Commit e80d0a1ae8bb8fee0edd37427836f108b30f596b

Authored by Frederic Weisbecker
1 parent a634f93335

cputime: Rename thread_group_times to thread_group_cputime_adjusted

We have thread_group_cputime() and thread_group_times(). The naming
doesn't provide enough information about the difference between
these two APIs.

To lower the confusion, rename thread_group_times() to
thread_group_cputime_adjusted(). This name better suggests that
it's a version of thread_group_cputime() that does some stabilization
on the raw cputime values. ie here: scale on top of CFS runtime
stats and bound lower value for monotonicity.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>

Showing 5 changed files with 13 additions and 13 deletions Inline Diff

1 /* 1 /*
2 * linux/fs/proc/array.c 2 * linux/fs/proc/array.c
3 * 3 *
4 * Copyright (C) 1992 by Linus Torvalds 4 * Copyright (C) 1992 by Linus Torvalds
5 * based on ideas by Darren Senn 5 * based on ideas by Darren Senn
6 * 6 *
7 * Fixes: 7 * Fixes:
8 * Michael. K. Johnson: stat,statm extensions. 8 * Michael. K. Johnson: stat,statm extensions.
9 * <johnsonm@stolaf.edu> 9 * <johnsonm@stolaf.edu>
10 * 10 *
11 * Pauline Middelink : Made cmdline,envline only break at '\0's, to 11 * Pauline Middelink : Made cmdline,envline only break at '\0's, to
12 * make sure SET_PROCTITLE works. Also removed 12 * make sure SET_PROCTITLE works. Also removed
13 * bad '!' which forced address recalculation for 13 * bad '!' which forced address recalculation for
14 * EVERY character on the current page. 14 * EVERY character on the current page.
15 * <middelin@polyware.iaf.nl> 15 * <middelin@polyware.iaf.nl>
16 * 16 *
17 * Danny ter Haar : added cpuinfo 17 * Danny ter Haar : added cpuinfo
18 * <dth@cistron.nl> 18 * <dth@cistron.nl>
19 * 19 *
20 * Alessandro Rubini : profile extension. 20 * Alessandro Rubini : profile extension.
21 * <rubini@ipvvis.unipv.it> 21 * <rubini@ipvvis.unipv.it>
22 * 22 *
23 * Jeff Tranter : added BogoMips field to cpuinfo 23 * Jeff Tranter : added BogoMips field to cpuinfo
24 * <Jeff_Tranter@Mitel.COM> 24 * <Jeff_Tranter@Mitel.COM>
25 * 25 *
26 * Bruno Haible : remove 4K limit for the maps file 26 * Bruno Haible : remove 4K limit for the maps file
27 * <haible@ma2s2.mathematik.uni-karlsruhe.de> 27 * <haible@ma2s2.mathematik.uni-karlsruhe.de>
28 * 28 *
29 * Yves Arrouye : remove removal of trailing spaces in get_array. 29 * Yves Arrouye : remove removal of trailing spaces in get_array.
30 * <Yves.Arrouye@marin.fdn.fr> 30 * <Yves.Arrouye@marin.fdn.fr>
31 * 31 *
32 * Jerome Forissier : added per-CPU time information to /proc/stat 32 * Jerome Forissier : added per-CPU time information to /proc/stat
33 * and /proc/<pid>/cpu extension 33 * and /proc/<pid>/cpu extension
34 * <forissier@isia.cma.fr> 34 * <forissier@isia.cma.fr>
35 * - Incorporation and non-SMP safe operation 35 * - Incorporation and non-SMP safe operation
36 * of forissier patch in 2.1.78 by 36 * of forissier patch in 2.1.78 by
37 * Hans Marcus <crowbar@concepts.nl> 37 * Hans Marcus <crowbar@concepts.nl>
38 * 38 *
39 * aeb@cwi.nl : /proc/partitions 39 * aeb@cwi.nl : /proc/partitions
40 * 40 *
41 * 41 *
42 * Alan Cox : security fixes. 42 * Alan Cox : security fixes.
43 * <alan@lxorguk.ukuu.org.uk> 43 * <alan@lxorguk.ukuu.org.uk>
44 * 44 *
45 * Al Viro : safe handling of mm_struct 45 * Al Viro : safe handling of mm_struct
46 * 46 *
47 * Gerhard Wichert : added BIGMEM support 47 * Gerhard Wichert : added BIGMEM support
48 * Siemens AG <Gerhard.Wichert@pdb.siemens.de> 48 * Siemens AG <Gerhard.Wichert@pdb.siemens.de>
49 * 49 *
50 * Al Viro & Jeff Garzik : moved most of the thing into base.c and 50 * Al Viro & Jeff Garzik : moved most of the thing into base.c and
51 * : proc_misc.c. The rest may eventually go into 51 * : proc_misc.c. The rest may eventually go into
52 * : base.c too. 52 * : base.c too.
53 */ 53 */
54 54
55 #include <linux/types.h> 55 #include <linux/types.h>
56 #include <linux/errno.h> 56 #include <linux/errno.h>
57 #include <linux/time.h> 57 #include <linux/time.h>
58 #include <linux/kernel.h> 58 #include <linux/kernel.h>
59 #include <linux/kernel_stat.h> 59 #include <linux/kernel_stat.h>
60 #include <linux/tty.h> 60 #include <linux/tty.h>
61 #include <linux/string.h> 61 #include <linux/string.h>
62 #include <linux/mman.h> 62 #include <linux/mman.h>
63 #include <linux/proc_fs.h> 63 #include <linux/proc_fs.h>
64 #include <linux/ioport.h> 64 #include <linux/ioport.h>
65 #include <linux/uaccess.h> 65 #include <linux/uaccess.h>
66 #include <linux/io.h> 66 #include <linux/io.h>
67 #include <linux/mm.h> 67 #include <linux/mm.h>
68 #include <linux/hugetlb.h> 68 #include <linux/hugetlb.h>
69 #include <linux/pagemap.h> 69 #include <linux/pagemap.h>
70 #include <linux/swap.h> 70 #include <linux/swap.h>
71 #include <linux/smp.h> 71 #include <linux/smp.h>
72 #include <linux/signal.h> 72 #include <linux/signal.h>
73 #include <linux/highmem.h> 73 #include <linux/highmem.h>
74 #include <linux/file.h> 74 #include <linux/file.h>
75 #include <linux/fdtable.h> 75 #include <linux/fdtable.h>
76 #include <linux/times.h> 76 #include <linux/times.h>
77 #include <linux/cpuset.h> 77 #include <linux/cpuset.h>
78 #include <linux/rcupdate.h> 78 #include <linux/rcupdate.h>
79 #include <linux/delayacct.h> 79 #include <linux/delayacct.h>
80 #include <linux/seq_file.h> 80 #include <linux/seq_file.h>
81 #include <linux/pid_namespace.h> 81 #include <linux/pid_namespace.h>
82 #include <linux/ptrace.h> 82 #include <linux/ptrace.h>
83 #include <linux/tracehook.h> 83 #include <linux/tracehook.h>
84 #include <linux/user_namespace.h> 84 #include <linux/user_namespace.h>
85 85
86 #include <asm/pgtable.h> 86 #include <asm/pgtable.h>
87 #include <asm/processor.h> 87 #include <asm/processor.h>
88 #include "internal.h" 88 #include "internal.h"
89 89
90 static inline void task_name(struct seq_file *m, struct task_struct *p) 90 static inline void task_name(struct seq_file *m, struct task_struct *p)
91 { 91 {
92 int i; 92 int i;
93 char *buf, *end; 93 char *buf, *end;
94 char *name; 94 char *name;
95 char tcomm[sizeof(p->comm)]; 95 char tcomm[sizeof(p->comm)];
96 96
97 get_task_comm(tcomm, p); 97 get_task_comm(tcomm, p);
98 98
99 seq_puts(m, "Name:\t"); 99 seq_puts(m, "Name:\t");
100 end = m->buf + m->size; 100 end = m->buf + m->size;
101 buf = m->buf + m->count; 101 buf = m->buf + m->count;
102 name = tcomm; 102 name = tcomm;
103 i = sizeof(tcomm); 103 i = sizeof(tcomm);
104 while (i && (buf < end)) { 104 while (i && (buf < end)) {
105 unsigned char c = *name; 105 unsigned char c = *name;
106 name++; 106 name++;
107 i--; 107 i--;
108 *buf = c; 108 *buf = c;
109 if (!c) 109 if (!c)
110 break; 110 break;
111 if (c == '\\') { 111 if (c == '\\') {
112 buf++; 112 buf++;
113 if (buf < end) 113 if (buf < end)
114 *buf++ = c; 114 *buf++ = c;
115 continue; 115 continue;
116 } 116 }
117 if (c == '\n') { 117 if (c == '\n') {
118 *buf++ = '\\'; 118 *buf++ = '\\';
119 if (buf < end) 119 if (buf < end)
120 *buf++ = 'n'; 120 *buf++ = 'n';
121 continue; 121 continue;
122 } 122 }
123 buf++; 123 buf++;
124 } 124 }
125 m->count = buf - m->buf; 125 m->count = buf - m->buf;
126 seq_putc(m, '\n'); 126 seq_putc(m, '\n');
127 } 127 }
128 128
129 /* 129 /*
130 * The task state array is a strange "bitmap" of 130 * The task state array is a strange "bitmap" of
131 * reasons to sleep. Thus "running" is zero, and 131 * reasons to sleep. Thus "running" is zero, and
132 * you can test for combinations of others with 132 * you can test for combinations of others with
133 * simple bit tests. 133 * simple bit tests.
134 */ 134 */
135 static const char * const task_state_array[] = { 135 static const char * const task_state_array[] = {
136 "R (running)", /* 0 */ 136 "R (running)", /* 0 */
137 "S (sleeping)", /* 1 */ 137 "S (sleeping)", /* 1 */
138 "D (disk sleep)", /* 2 */ 138 "D (disk sleep)", /* 2 */
139 "T (stopped)", /* 4 */ 139 "T (stopped)", /* 4 */
140 "t (tracing stop)", /* 8 */ 140 "t (tracing stop)", /* 8 */
141 "Z (zombie)", /* 16 */ 141 "Z (zombie)", /* 16 */
142 "X (dead)", /* 32 */ 142 "X (dead)", /* 32 */
143 "x (dead)", /* 64 */ 143 "x (dead)", /* 64 */
144 "K (wakekill)", /* 128 */ 144 "K (wakekill)", /* 128 */
145 "W (waking)", /* 256 */ 145 "W (waking)", /* 256 */
146 }; 146 };
147 147
148 static inline const char *get_task_state(struct task_struct *tsk) 148 static inline const char *get_task_state(struct task_struct *tsk)
149 { 149 {
150 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; 150 unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
151 const char * const *p = &task_state_array[0]; 151 const char * const *p = &task_state_array[0];
152 152
153 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); 153 BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
154 154
155 while (state) { 155 while (state) {
156 p++; 156 p++;
157 state >>= 1; 157 state >>= 1;
158 } 158 }
159 return *p; 159 return *p;
160 } 160 }
161 161
162 static inline void task_state(struct seq_file *m, struct pid_namespace *ns, 162 static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
163 struct pid *pid, struct task_struct *p) 163 struct pid *pid, struct task_struct *p)
164 { 164 {
165 struct user_namespace *user_ns = current_user_ns(); 165 struct user_namespace *user_ns = current_user_ns();
166 struct group_info *group_info; 166 struct group_info *group_info;
167 int g; 167 int g;
168 struct fdtable *fdt = NULL; 168 struct fdtable *fdt = NULL;
169 const struct cred *cred; 169 const struct cred *cred;
170 pid_t ppid, tpid; 170 pid_t ppid, tpid;
171 171
172 rcu_read_lock(); 172 rcu_read_lock();
173 ppid = pid_alive(p) ? 173 ppid = pid_alive(p) ?
174 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; 174 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
175 tpid = 0; 175 tpid = 0;
176 if (pid_alive(p)) { 176 if (pid_alive(p)) {
177 struct task_struct *tracer = ptrace_parent(p); 177 struct task_struct *tracer = ptrace_parent(p);
178 if (tracer) 178 if (tracer)
179 tpid = task_pid_nr_ns(tracer, ns); 179 tpid = task_pid_nr_ns(tracer, ns);
180 } 180 }
181 cred = get_task_cred(p); 181 cred = get_task_cred(p);
182 seq_printf(m, 182 seq_printf(m,
183 "State:\t%s\n" 183 "State:\t%s\n"
184 "Tgid:\t%d\n" 184 "Tgid:\t%d\n"
185 "Pid:\t%d\n" 185 "Pid:\t%d\n"
186 "PPid:\t%d\n" 186 "PPid:\t%d\n"
187 "TracerPid:\t%d\n" 187 "TracerPid:\t%d\n"
188 "Uid:\t%d\t%d\t%d\t%d\n" 188 "Uid:\t%d\t%d\t%d\t%d\n"
189 "Gid:\t%d\t%d\t%d\t%d\n", 189 "Gid:\t%d\t%d\t%d\t%d\n",
190 get_task_state(p), 190 get_task_state(p),
191 task_tgid_nr_ns(p, ns), 191 task_tgid_nr_ns(p, ns),
192 pid_nr_ns(pid, ns), 192 pid_nr_ns(pid, ns),
193 ppid, tpid, 193 ppid, tpid,
194 from_kuid_munged(user_ns, cred->uid), 194 from_kuid_munged(user_ns, cred->uid),
195 from_kuid_munged(user_ns, cred->euid), 195 from_kuid_munged(user_ns, cred->euid),
196 from_kuid_munged(user_ns, cred->suid), 196 from_kuid_munged(user_ns, cred->suid),
197 from_kuid_munged(user_ns, cred->fsuid), 197 from_kuid_munged(user_ns, cred->fsuid),
198 from_kgid_munged(user_ns, cred->gid), 198 from_kgid_munged(user_ns, cred->gid),
199 from_kgid_munged(user_ns, cred->egid), 199 from_kgid_munged(user_ns, cred->egid),
200 from_kgid_munged(user_ns, cred->sgid), 200 from_kgid_munged(user_ns, cred->sgid),
201 from_kgid_munged(user_ns, cred->fsgid)); 201 from_kgid_munged(user_ns, cred->fsgid));
202 202
203 task_lock(p); 203 task_lock(p);
204 if (p->files) 204 if (p->files)
205 fdt = files_fdtable(p->files); 205 fdt = files_fdtable(p->files);
206 seq_printf(m, 206 seq_printf(m,
207 "FDSize:\t%d\n" 207 "FDSize:\t%d\n"
208 "Groups:\t", 208 "Groups:\t",
209 fdt ? fdt->max_fds : 0); 209 fdt ? fdt->max_fds : 0);
210 rcu_read_unlock(); 210 rcu_read_unlock();
211 211
212 group_info = cred->group_info; 212 group_info = cred->group_info;
213 task_unlock(p); 213 task_unlock(p);
214 214
215 for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) 215 for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
216 seq_printf(m, "%d ", 216 seq_printf(m, "%d ",
217 from_kgid_munged(user_ns, GROUP_AT(group_info, g))); 217 from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
218 put_cred(cred); 218 put_cred(cred);
219 219
220 seq_putc(m, '\n'); 220 seq_putc(m, '\n');
221 } 221 }
222 222
223 static void render_sigset_t(struct seq_file *m, const char *header, 223 static void render_sigset_t(struct seq_file *m, const char *header,
224 sigset_t *set) 224 sigset_t *set)
225 { 225 {
226 int i; 226 int i;
227 227
228 seq_puts(m, header); 228 seq_puts(m, header);
229 229
230 i = _NSIG; 230 i = _NSIG;
231 do { 231 do {
232 int x = 0; 232 int x = 0;
233 233
234 i -= 4; 234 i -= 4;
235 if (sigismember(set, i+1)) x |= 1; 235 if (sigismember(set, i+1)) x |= 1;
236 if (sigismember(set, i+2)) x |= 2; 236 if (sigismember(set, i+2)) x |= 2;
237 if (sigismember(set, i+3)) x |= 4; 237 if (sigismember(set, i+3)) x |= 4;
238 if (sigismember(set, i+4)) x |= 8; 238 if (sigismember(set, i+4)) x |= 8;
239 seq_printf(m, "%x", x); 239 seq_printf(m, "%x", x);
240 } while (i >= 4); 240 } while (i >= 4);
241 241
242 seq_putc(m, '\n'); 242 seq_putc(m, '\n');
243 } 243 }
244 244
245 static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, 245 static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
246 sigset_t *catch) 246 sigset_t *catch)
247 { 247 {
248 struct k_sigaction *k; 248 struct k_sigaction *k;
249 int i; 249 int i;
250 250
251 k = p->sighand->action; 251 k = p->sighand->action;
252 for (i = 1; i <= _NSIG; ++i, ++k) { 252 for (i = 1; i <= _NSIG; ++i, ++k) {
253 if (k->sa.sa_handler == SIG_IGN) 253 if (k->sa.sa_handler == SIG_IGN)
254 sigaddset(ign, i); 254 sigaddset(ign, i);
255 else if (k->sa.sa_handler != SIG_DFL) 255 else if (k->sa.sa_handler != SIG_DFL)
256 sigaddset(catch, i); 256 sigaddset(catch, i);
257 } 257 }
258 } 258 }
259 259
260 static inline void task_sig(struct seq_file *m, struct task_struct *p) 260 static inline void task_sig(struct seq_file *m, struct task_struct *p)
261 { 261 {
262 unsigned long flags; 262 unsigned long flags;
263 sigset_t pending, shpending, blocked, ignored, caught; 263 sigset_t pending, shpending, blocked, ignored, caught;
264 int num_threads = 0; 264 int num_threads = 0;
265 unsigned long qsize = 0; 265 unsigned long qsize = 0;
266 unsigned long qlim = 0; 266 unsigned long qlim = 0;
267 267
268 sigemptyset(&pending); 268 sigemptyset(&pending);
269 sigemptyset(&shpending); 269 sigemptyset(&shpending);
270 sigemptyset(&blocked); 270 sigemptyset(&blocked);
271 sigemptyset(&ignored); 271 sigemptyset(&ignored);
272 sigemptyset(&caught); 272 sigemptyset(&caught);
273 273
274 if (lock_task_sighand(p, &flags)) { 274 if (lock_task_sighand(p, &flags)) {
275 pending = p->pending.signal; 275 pending = p->pending.signal;
276 shpending = p->signal->shared_pending.signal; 276 shpending = p->signal->shared_pending.signal;
277 blocked = p->blocked; 277 blocked = p->blocked;
278 collect_sigign_sigcatch(p, &ignored, &caught); 278 collect_sigign_sigcatch(p, &ignored, &caught);
279 num_threads = get_nr_threads(p); 279 num_threads = get_nr_threads(p);
280 rcu_read_lock(); /* FIXME: is this correct? */ 280 rcu_read_lock(); /* FIXME: is this correct? */
281 qsize = atomic_read(&__task_cred(p)->user->sigpending); 281 qsize = atomic_read(&__task_cred(p)->user->sigpending);
282 rcu_read_unlock(); 282 rcu_read_unlock();
283 qlim = task_rlimit(p, RLIMIT_SIGPENDING); 283 qlim = task_rlimit(p, RLIMIT_SIGPENDING);
284 unlock_task_sighand(p, &flags); 284 unlock_task_sighand(p, &flags);
285 } 285 }
286 286
287 seq_printf(m, "Threads:\t%d\n", num_threads); 287 seq_printf(m, "Threads:\t%d\n", num_threads);
288 seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); 288 seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim);
289 289
290 /* render them all */ 290 /* render them all */
291 render_sigset_t(m, "SigPnd:\t", &pending); 291 render_sigset_t(m, "SigPnd:\t", &pending);
292 render_sigset_t(m, "ShdPnd:\t", &shpending); 292 render_sigset_t(m, "ShdPnd:\t", &shpending);
293 render_sigset_t(m, "SigBlk:\t", &blocked); 293 render_sigset_t(m, "SigBlk:\t", &blocked);
294 render_sigset_t(m, "SigIgn:\t", &ignored); 294 render_sigset_t(m, "SigIgn:\t", &ignored);
295 render_sigset_t(m, "SigCgt:\t", &caught); 295 render_sigset_t(m, "SigCgt:\t", &caught);
296 } 296 }
297 297
298 static void render_cap_t(struct seq_file *m, const char *header, 298 static void render_cap_t(struct seq_file *m, const char *header,
299 kernel_cap_t *a) 299 kernel_cap_t *a)
300 { 300 {
301 unsigned __capi; 301 unsigned __capi;
302 302
303 seq_puts(m, header); 303 seq_puts(m, header);
304 CAP_FOR_EACH_U32(__capi) { 304 CAP_FOR_EACH_U32(__capi) {
305 seq_printf(m, "%08x", 305 seq_printf(m, "%08x",
306 a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); 306 a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
307 } 307 }
308 seq_putc(m, '\n'); 308 seq_putc(m, '\n');
309 } 309 }
310 310
311 static inline void task_cap(struct seq_file *m, struct task_struct *p) 311 static inline void task_cap(struct seq_file *m, struct task_struct *p)
312 { 312 {
313 const struct cred *cred; 313 const struct cred *cred;
314 kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset; 314 kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset;
315 315
316 rcu_read_lock(); 316 rcu_read_lock();
317 cred = __task_cred(p); 317 cred = __task_cred(p);
318 cap_inheritable = cred->cap_inheritable; 318 cap_inheritable = cred->cap_inheritable;
319 cap_permitted = cred->cap_permitted; 319 cap_permitted = cred->cap_permitted;
320 cap_effective = cred->cap_effective; 320 cap_effective = cred->cap_effective;
321 cap_bset = cred->cap_bset; 321 cap_bset = cred->cap_bset;
322 rcu_read_unlock(); 322 rcu_read_unlock();
323 323
324 render_cap_t(m, "CapInh:\t", &cap_inheritable); 324 render_cap_t(m, "CapInh:\t", &cap_inheritable);
325 render_cap_t(m, "CapPrm:\t", &cap_permitted); 325 render_cap_t(m, "CapPrm:\t", &cap_permitted);
326 render_cap_t(m, "CapEff:\t", &cap_effective); 326 render_cap_t(m, "CapEff:\t", &cap_effective);
327 render_cap_t(m, "CapBnd:\t", &cap_bset); 327 render_cap_t(m, "CapBnd:\t", &cap_bset);
328 } 328 }
329 329
330 static inline void task_context_switch_counts(struct seq_file *m, 330 static inline void task_context_switch_counts(struct seq_file *m,
331 struct task_struct *p) 331 struct task_struct *p)
332 { 332 {
333 seq_printf(m, "voluntary_ctxt_switches:\t%lu\n" 333 seq_printf(m, "voluntary_ctxt_switches:\t%lu\n"
334 "nonvoluntary_ctxt_switches:\t%lu\n", 334 "nonvoluntary_ctxt_switches:\t%lu\n",
335 p->nvcsw, 335 p->nvcsw,
336 p->nivcsw); 336 p->nivcsw);
337 } 337 }
338 338
339 static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 339 static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
340 { 340 {
341 seq_puts(m, "Cpus_allowed:\t"); 341 seq_puts(m, "Cpus_allowed:\t");
342 seq_cpumask(m, &task->cpus_allowed); 342 seq_cpumask(m, &task->cpus_allowed);
343 seq_putc(m, '\n'); 343 seq_putc(m, '\n');
344 seq_puts(m, "Cpus_allowed_list:\t"); 344 seq_puts(m, "Cpus_allowed_list:\t");
345 seq_cpumask_list(m, &task->cpus_allowed); 345 seq_cpumask_list(m, &task->cpus_allowed);
346 seq_putc(m, '\n'); 346 seq_putc(m, '\n');
347 } 347 }
348 348
349 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 349 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
350 struct pid *pid, struct task_struct *task) 350 struct pid *pid, struct task_struct *task)
351 { 351 {
352 struct mm_struct *mm = get_task_mm(task); 352 struct mm_struct *mm = get_task_mm(task);
353 353
354 task_name(m, task); 354 task_name(m, task);
355 task_state(m, ns, pid, task); 355 task_state(m, ns, pid, task);
356 356
357 if (mm) { 357 if (mm) {
358 task_mem(m, mm); 358 task_mem(m, mm);
359 mmput(mm); 359 mmput(mm);
360 } 360 }
361 task_sig(m, task); 361 task_sig(m, task);
362 task_cap(m, task); 362 task_cap(m, task);
363 task_cpus_allowed(m, task); 363 task_cpus_allowed(m, task);
364 cpuset_task_status_allowed(m, task); 364 cpuset_task_status_allowed(m, task);
365 task_context_switch_counts(m, task); 365 task_context_switch_counts(m, task);
366 return 0; 366 return 0;
367 } 367 }
368 368
369 static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, 369 static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
370 struct pid *pid, struct task_struct *task, int whole) 370 struct pid *pid, struct task_struct *task, int whole)
371 { 371 {
372 unsigned long vsize, eip, esp, wchan = ~0UL; 372 unsigned long vsize, eip, esp, wchan = ~0UL;
373 int priority, nice; 373 int priority, nice;
374 int tty_pgrp = -1, tty_nr = 0; 374 int tty_pgrp = -1, tty_nr = 0;
375 sigset_t sigign, sigcatch; 375 sigset_t sigign, sigcatch;
376 char state; 376 char state;
377 pid_t ppid = 0, pgid = -1, sid = -1; 377 pid_t ppid = 0, pgid = -1, sid = -1;
378 int num_threads = 0; 378 int num_threads = 0;
379 int permitted; 379 int permitted;
380 struct mm_struct *mm; 380 struct mm_struct *mm;
381 unsigned long long start_time; 381 unsigned long long start_time;
382 unsigned long cmin_flt = 0, cmaj_flt = 0; 382 unsigned long cmin_flt = 0, cmaj_flt = 0;
383 unsigned long min_flt = 0, maj_flt = 0; 383 unsigned long min_flt = 0, maj_flt = 0;
384 cputime_t cutime, cstime, utime, stime; 384 cputime_t cutime, cstime, utime, stime;
385 cputime_t cgtime, gtime; 385 cputime_t cgtime, gtime;
386 unsigned long rsslim = 0; 386 unsigned long rsslim = 0;
387 char tcomm[sizeof(task->comm)]; 387 char tcomm[sizeof(task->comm)];
388 unsigned long flags; 388 unsigned long flags;
389 389
390 state = *get_task_state(task); 390 state = *get_task_state(task);
391 vsize = eip = esp = 0; 391 vsize = eip = esp = 0;
392 permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT); 392 permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT);
393 mm = get_task_mm(task); 393 mm = get_task_mm(task);
394 if (mm) { 394 if (mm) {
395 vsize = task_vsize(mm); 395 vsize = task_vsize(mm);
396 if (permitted) { 396 if (permitted) {
397 eip = KSTK_EIP(task); 397 eip = KSTK_EIP(task);
398 esp = KSTK_ESP(task); 398 esp = KSTK_ESP(task);
399 } 399 }
400 } 400 }
401 401
402 get_task_comm(tcomm, task); 402 get_task_comm(tcomm, task);
403 403
404 sigemptyset(&sigign); 404 sigemptyset(&sigign);
405 sigemptyset(&sigcatch); 405 sigemptyset(&sigcatch);
406 cutime = cstime = utime = stime = 0; 406 cutime = cstime = utime = stime = 0;
407 cgtime = gtime = 0; 407 cgtime = gtime = 0;
408 408
409 if (lock_task_sighand(task, &flags)) { 409 if (lock_task_sighand(task, &flags)) {
410 struct signal_struct *sig = task->signal; 410 struct signal_struct *sig = task->signal;
411 411
412 if (sig->tty) { 412 if (sig->tty) {
413 struct pid *pgrp = tty_get_pgrp(sig->tty); 413 struct pid *pgrp = tty_get_pgrp(sig->tty);
414 tty_pgrp = pid_nr_ns(pgrp, ns); 414 tty_pgrp = pid_nr_ns(pgrp, ns);
415 put_pid(pgrp); 415 put_pid(pgrp);
416 tty_nr = new_encode_dev(tty_devnum(sig->tty)); 416 tty_nr = new_encode_dev(tty_devnum(sig->tty));
417 } 417 }
418 418
419 num_threads = get_nr_threads(task); 419 num_threads = get_nr_threads(task);
420 collect_sigign_sigcatch(task, &sigign, &sigcatch); 420 collect_sigign_sigcatch(task, &sigign, &sigcatch);
421 421
422 cmin_flt = sig->cmin_flt; 422 cmin_flt = sig->cmin_flt;
423 cmaj_flt = sig->cmaj_flt; 423 cmaj_flt = sig->cmaj_flt;
424 cutime = sig->cutime; 424 cutime = sig->cutime;
425 cstime = sig->cstime; 425 cstime = sig->cstime;
426 cgtime = sig->cgtime; 426 cgtime = sig->cgtime;
427 rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); 427 rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
428 428
429 /* add up live thread stats at the group level */ 429 /* add up live thread stats at the group level */
430 if (whole) { 430 if (whole) {
431 struct task_struct *t = task; 431 struct task_struct *t = task;
432 do { 432 do {
433 min_flt += t->min_flt; 433 min_flt += t->min_flt;
434 maj_flt += t->maj_flt; 434 maj_flt += t->maj_flt;
435 gtime += t->gtime; 435 gtime += t->gtime;
436 t = next_thread(t); 436 t = next_thread(t);
437 } while (t != task); 437 } while (t != task);
438 438
439 min_flt += sig->min_flt; 439 min_flt += sig->min_flt;
440 maj_flt += sig->maj_flt; 440 maj_flt += sig->maj_flt;
441 thread_group_times(task, &utime, &stime); 441 thread_group_cputime_adjusted(task, &utime, &stime);
442 gtime += sig->gtime; 442 gtime += sig->gtime;
443 } 443 }
444 444
445 sid = task_session_nr_ns(task, ns); 445 sid = task_session_nr_ns(task, ns);
446 ppid = task_tgid_nr_ns(task->real_parent, ns); 446 ppid = task_tgid_nr_ns(task->real_parent, ns);
447 pgid = task_pgrp_nr_ns(task, ns); 447 pgid = task_pgrp_nr_ns(task, ns);
448 448
449 unlock_task_sighand(task, &flags); 449 unlock_task_sighand(task, &flags);
450 } 450 }
451 451
452 if (permitted && (!whole || num_threads < 2)) 452 if (permitted && (!whole || num_threads < 2))
453 wchan = get_wchan(task); 453 wchan = get_wchan(task);
454 if (!whole) { 454 if (!whole) {
455 min_flt = task->min_flt; 455 min_flt = task->min_flt;
456 maj_flt = task->maj_flt; 456 maj_flt = task->maj_flt;
457 task_times(task, &utime, &stime); 457 task_cputime_adjusted(task, &utime, &stime);
458 gtime = task->gtime; 458 gtime = task->gtime;
459 } 459 }
460 460
461 /* scale priority and nice values from timeslices to -20..20 */ 461 /* scale priority and nice values from timeslices to -20..20 */
462 /* to make it look like a "normal" Unix priority/nice value */ 462 /* to make it look like a "normal" Unix priority/nice value */
463 priority = task_prio(task); 463 priority = task_prio(task);
464 nice = task_nice(task); 464 nice = task_nice(task);
465 465
466 /* Temporary variable needed for gcc-2.96 */ 466 /* Temporary variable needed for gcc-2.96 */
467 /* convert timespec -> nsec*/ 467 /* convert timespec -> nsec*/
468 start_time = 468 start_time =
469 (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC 469 (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
470 + task->real_start_time.tv_nsec; 470 + task->real_start_time.tv_nsec;
471 /* convert nsec -> ticks */ 471 /* convert nsec -> ticks */
472 start_time = nsec_to_clock_t(start_time); 472 start_time = nsec_to_clock_t(start_time);
473 473
474 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); 474 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
475 seq_put_decimal_ll(m, ' ', ppid); 475 seq_put_decimal_ll(m, ' ', ppid);
476 seq_put_decimal_ll(m, ' ', pgid); 476 seq_put_decimal_ll(m, ' ', pgid);
477 seq_put_decimal_ll(m, ' ', sid); 477 seq_put_decimal_ll(m, ' ', sid);
478 seq_put_decimal_ll(m, ' ', tty_nr); 478 seq_put_decimal_ll(m, ' ', tty_nr);
479 seq_put_decimal_ll(m, ' ', tty_pgrp); 479 seq_put_decimal_ll(m, ' ', tty_pgrp);
480 seq_put_decimal_ull(m, ' ', task->flags); 480 seq_put_decimal_ull(m, ' ', task->flags);
481 seq_put_decimal_ull(m, ' ', min_flt); 481 seq_put_decimal_ull(m, ' ', min_flt);
482 seq_put_decimal_ull(m, ' ', cmin_flt); 482 seq_put_decimal_ull(m, ' ', cmin_flt);
483 seq_put_decimal_ull(m, ' ', maj_flt); 483 seq_put_decimal_ull(m, ' ', maj_flt);
484 seq_put_decimal_ull(m, ' ', cmaj_flt); 484 seq_put_decimal_ull(m, ' ', cmaj_flt);
485 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime)); 485 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime));
486 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime)); 486 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime));
487 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime)); 487 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime));
488 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime)); 488 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime));
489 seq_put_decimal_ll(m, ' ', priority); 489 seq_put_decimal_ll(m, ' ', priority);
490 seq_put_decimal_ll(m, ' ', nice); 490 seq_put_decimal_ll(m, ' ', nice);
491 seq_put_decimal_ll(m, ' ', num_threads); 491 seq_put_decimal_ll(m, ' ', num_threads);
492 seq_put_decimal_ull(m, ' ', 0); 492 seq_put_decimal_ull(m, ' ', 0);
493 seq_put_decimal_ull(m, ' ', start_time); 493 seq_put_decimal_ull(m, ' ', start_time);
494 seq_put_decimal_ull(m, ' ', vsize); 494 seq_put_decimal_ull(m, ' ', vsize);
495 seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0); 495 seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0);
496 seq_put_decimal_ull(m, ' ', rsslim); 496 seq_put_decimal_ull(m, ' ', rsslim);
497 seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); 497 seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
498 seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); 498 seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
499 seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0); 499 seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0);
500 seq_put_decimal_ull(m, ' ', esp); 500 seq_put_decimal_ull(m, ' ', esp);
501 seq_put_decimal_ull(m, ' ', eip); 501 seq_put_decimal_ull(m, ' ', eip);
502 /* The signal information here is obsolete. 502 /* The signal information here is obsolete.
503 * It must be decimal for Linux 2.0 compatibility. 503 * It must be decimal for Linux 2.0 compatibility.
504 * Use /proc/#/status for real-time signals. 504 * Use /proc/#/status for real-time signals.
505 */ 505 */
506 seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL); 506 seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL);
507 seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); 507 seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
508 seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); 508 seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
509 seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); 509 seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
510 seq_put_decimal_ull(m, ' ', wchan); 510 seq_put_decimal_ull(m, ' ', wchan);
511 seq_put_decimal_ull(m, ' ', 0); 511 seq_put_decimal_ull(m, ' ', 0);
512 seq_put_decimal_ull(m, ' ', 0); 512 seq_put_decimal_ull(m, ' ', 0);
513 seq_put_decimal_ll(m, ' ', task->exit_signal); 513 seq_put_decimal_ll(m, ' ', task->exit_signal);
514 seq_put_decimal_ll(m, ' ', task_cpu(task)); 514 seq_put_decimal_ll(m, ' ', task_cpu(task));
515 seq_put_decimal_ull(m, ' ', task->rt_priority); 515 seq_put_decimal_ull(m, ' ', task->rt_priority);
516 seq_put_decimal_ull(m, ' ', task->policy); 516 seq_put_decimal_ull(m, ' ', task->policy);
517 seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); 517 seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
518 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); 518 seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
519 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); 519 seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
520 520
521 if (mm && permitted) { 521 if (mm && permitted) {
522 seq_put_decimal_ull(m, ' ', mm->start_data); 522 seq_put_decimal_ull(m, ' ', mm->start_data);
523 seq_put_decimal_ull(m, ' ', mm->end_data); 523 seq_put_decimal_ull(m, ' ', mm->end_data);
524 seq_put_decimal_ull(m, ' ', mm->start_brk); 524 seq_put_decimal_ull(m, ' ', mm->start_brk);
525 seq_put_decimal_ull(m, ' ', mm->arg_start); 525 seq_put_decimal_ull(m, ' ', mm->arg_start);
526 seq_put_decimal_ull(m, ' ', mm->arg_end); 526 seq_put_decimal_ull(m, ' ', mm->arg_end);
527 seq_put_decimal_ull(m, ' ', mm->env_start); 527 seq_put_decimal_ull(m, ' ', mm->env_start);
528 seq_put_decimal_ull(m, ' ', mm->env_end); 528 seq_put_decimal_ull(m, ' ', mm->env_end);
529 } else 529 } else
530 seq_printf(m, " 0 0 0 0 0 0 0"); 530 seq_printf(m, " 0 0 0 0 0 0 0");
531 531
532 if (permitted) 532 if (permitted)
533 seq_put_decimal_ll(m, ' ', task->exit_code); 533 seq_put_decimal_ll(m, ' ', task->exit_code);
534 else 534 else
535 seq_put_decimal_ll(m, ' ', 0); 535 seq_put_decimal_ll(m, ' ', 0);
536 536
537 seq_putc(m, '\n'); 537 seq_putc(m, '\n');
538 if (mm) 538 if (mm)
539 mmput(mm); 539 mmput(mm);
540 return 0; 540 return 0;
541 } 541 }
542 542
543 int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 543 int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
544 struct pid *pid, struct task_struct *task) 544 struct pid *pid, struct task_struct *task)
545 { 545 {
546 return do_task_stat(m, ns, pid, task, 0); 546 return do_task_stat(m, ns, pid, task, 0);
547 } 547 }
548 548
549 int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, 549 int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
550 struct pid *pid, struct task_struct *task) 550 struct pid *pid, struct task_struct *task)
551 { 551 {
552 return do_task_stat(m, ns, pid, task, 1); 552 return do_task_stat(m, ns, pid, task, 1);
553 } 553 }
554 554
555 int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, 555 int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
556 struct pid *pid, struct task_struct *task) 556 struct pid *pid, struct task_struct *task)
557 { 557 {
558 unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0; 558 unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0;
559 struct mm_struct *mm = get_task_mm(task); 559 struct mm_struct *mm = get_task_mm(task);
560 560
561 if (mm) { 561 if (mm) {
562 size = task_statm(mm, &shared, &text, &data, &resident); 562 size = task_statm(mm, &shared, &text, &data, &resident);
563 mmput(mm); 563 mmput(mm);
564 } 564 }
565 /* 565 /*
566 * For quick read, open code by putting numbers directly 566 * For quick read, open code by putting numbers directly
567 * expected format is 567 * expected format is
568 * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", 568 * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
569 * size, resident, shared, text, data); 569 * size, resident, shared, text, data);
570 */ 570 */
571 seq_put_decimal_ull(m, 0, size); 571 seq_put_decimal_ull(m, 0, size);
572 seq_put_decimal_ull(m, ' ', resident); 572 seq_put_decimal_ull(m, ' ', resident);
573 seq_put_decimal_ull(m, ' ', shared); 573 seq_put_decimal_ull(m, ' ', shared);
574 seq_put_decimal_ull(m, ' ', text); 574 seq_put_decimal_ull(m, ' ', text);
575 seq_put_decimal_ull(m, ' ', 0); 575 seq_put_decimal_ull(m, ' ', 0);
576 seq_put_decimal_ull(m, ' ', data); 576 seq_put_decimal_ull(m, ' ', data);
577 seq_put_decimal_ull(m, ' ', 0); 577 seq_put_decimal_ull(m, ' ', 0);
578 seq_putc(m, '\n'); 578 seq_putc(m, '\n');
579 579
580 return 0; 580 return 0;
581 } 581 }
582 582
583 #ifdef CONFIG_CHECKPOINT_RESTORE 583 #ifdef CONFIG_CHECKPOINT_RESTORE
584 static struct pid * 584 static struct pid *
585 get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos) 585 get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos)
586 { 586 {
587 struct task_struct *start, *task; 587 struct task_struct *start, *task;
588 struct pid *pid = NULL; 588 struct pid *pid = NULL;
589 589
590 read_lock(&tasklist_lock); 590 read_lock(&tasklist_lock);
591 591
592 start = pid_task(proc_pid(inode), PIDTYPE_PID); 592 start = pid_task(proc_pid(inode), PIDTYPE_PID);
593 if (!start) 593 if (!start)
594 goto out; 594 goto out;
595 595
596 /* 596 /*
597 * Lets try to continue searching first, this gives 597 * Lets try to continue searching first, this gives
598 * us significant speedup on children-rich processes. 598 * us significant speedup on children-rich processes.
599 */ 599 */
600 if (pid_prev) { 600 if (pid_prev) {
601 task = pid_task(pid_prev, PIDTYPE_PID); 601 task = pid_task(pid_prev, PIDTYPE_PID);
602 if (task && task->real_parent == start && 602 if (task && task->real_parent == start &&
603 !(list_empty(&task->sibling))) { 603 !(list_empty(&task->sibling))) {
604 if (list_is_last(&task->sibling, &start->children)) 604 if (list_is_last(&task->sibling, &start->children))
605 goto out; 605 goto out;
606 task = list_first_entry(&task->sibling, 606 task = list_first_entry(&task->sibling,
607 struct task_struct, sibling); 607 struct task_struct, sibling);
608 pid = get_pid(task_pid(task)); 608 pid = get_pid(task_pid(task));
609 goto out; 609 goto out;
610 } 610 }
611 } 611 }
612 612
613 /* 613 /*
614 * Slow search case. 614 * Slow search case.
615 * 615 *
616 * We might miss some children here if children 616 * We might miss some children here if children
617 * are exited while we were not holding the lock, 617 * are exited while we were not holding the lock,
618 * but it was never promised to be accurate that 618 * but it was never promised to be accurate that
619 * much. 619 * much.
620 * 620 *
621 * "Just suppose that the parent sleeps, but N children 621 * "Just suppose that the parent sleeps, but N children
622 * exit after we printed their tids. Now the slow paths 622 * exit after we printed their tids. Now the slow paths
623 * skips N extra children, we miss N tasks." (c) 623 * skips N extra children, we miss N tasks." (c)
624 * 624 *
625 * So one need to stop or freeze the leader and all 625 * So one need to stop or freeze the leader and all
626 * its children to get a precise result. 626 * its children to get a precise result.
627 */ 627 */
628 list_for_each_entry(task, &start->children, sibling) { 628 list_for_each_entry(task, &start->children, sibling) {
629 if (pos-- == 0) { 629 if (pos-- == 0) {
630 pid = get_pid(task_pid(task)); 630 pid = get_pid(task_pid(task));
631 break; 631 break;
632 } 632 }
633 } 633 }
634 634
635 out: 635 out:
636 read_unlock(&tasklist_lock); 636 read_unlock(&tasklist_lock);
637 return pid; 637 return pid;
638 } 638 }
639 639
640 static int children_seq_show(struct seq_file *seq, void *v) 640 static int children_seq_show(struct seq_file *seq, void *v)
641 { 641 {
642 struct inode *inode = seq->private; 642 struct inode *inode = seq->private;
643 pid_t pid; 643 pid_t pid;
644 644
645 pid = pid_nr_ns(v, inode->i_sb->s_fs_info); 645 pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
646 return seq_printf(seq, "%d ", pid); 646 return seq_printf(seq, "%d ", pid);
647 } 647 }
648 648
649 static void *children_seq_start(struct seq_file *seq, loff_t *pos) 649 static void *children_seq_start(struct seq_file *seq, loff_t *pos)
650 { 650 {
651 return get_children_pid(seq->private, NULL, *pos); 651 return get_children_pid(seq->private, NULL, *pos);
652 } 652 }
653 653
654 static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos) 654 static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
655 { 655 {
656 struct pid *pid; 656 struct pid *pid;
657 657
658 pid = get_children_pid(seq->private, v, *pos + 1); 658 pid = get_children_pid(seq->private, v, *pos + 1);
659 put_pid(v); 659 put_pid(v);
660 660
661 ++*pos; 661 ++*pos;
662 return pid; 662 return pid;
663 } 663 }
664 664
665 static void children_seq_stop(struct seq_file *seq, void *v) 665 static void children_seq_stop(struct seq_file *seq, void *v)
666 { 666 {
667 put_pid(v); 667 put_pid(v);
668 } 668 }
669 669
670 static const struct seq_operations children_seq_ops = { 670 static const struct seq_operations children_seq_ops = {
671 .start = children_seq_start, 671 .start = children_seq_start,
672 .next = children_seq_next, 672 .next = children_seq_next,
673 .stop = children_seq_stop, 673 .stop = children_seq_stop,
674 .show = children_seq_show, 674 .show = children_seq_show,
675 }; 675 };
676 676
677 static int children_seq_open(struct inode *inode, struct file *file) 677 static int children_seq_open(struct inode *inode, struct file *file)
678 { 678 {
679 struct seq_file *m; 679 struct seq_file *m;
680 int ret; 680 int ret;
681 681
682 ret = seq_open(file, &children_seq_ops); 682 ret = seq_open(file, &children_seq_ops);
683 if (ret) 683 if (ret)
684 return ret; 684 return ret;
685 685
686 m = file->private_data; 686 m = file->private_data;
687 m->private = inode; 687 m->private = inode;
688 688
689 return ret; 689 return ret;
690 } 690 }
691 691
692 int children_seq_release(struct inode *inode, struct file *file) 692 int children_seq_release(struct inode *inode, struct file *file)
693 { 693 {
694 seq_release(inode, file); 694 seq_release(inode, file);
695 return 0; 695 return 0;
696 } 696 }
697 697
698 const struct file_operations proc_tid_children_operations = { 698 const struct file_operations proc_tid_children_operations = {
699 .open = children_seq_open, 699 .open = children_seq_open,
700 .read = seq_read, 700 .read = seq_read,
701 .llseek = seq_lseek, 701 .llseek = seq_lseek,
702 .release = children_seq_release, 702 .release = children_seq_release,
703 }; 703 };
704 #endif /* CONFIG_CHECKPOINT_RESTORE */ 704 #endif /* CONFIG_CHECKPOINT_RESTORE */
705 705
include/linux/sched.h
1 #ifndef _LINUX_SCHED_H 1 #ifndef _LINUX_SCHED_H
2 #define _LINUX_SCHED_H 2 #define _LINUX_SCHED_H
3 3
4 #include <uapi/linux/sched.h> 4 #include <uapi/linux/sched.h>
5 5
6 6
7 struct sched_param { 7 struct sched_param {
8 int sched_priority; 8 int sched_priority;
9 }; 9 };
10 10
11 #include <asm/param.h> /* for HZ */ 11 #include <asm/param.h> /* for HZ */
12 12
13 #include <linux/capability.h> 13 #include <linux/capability.h>
14 #include <linux/threads.h> 14 #include <linux/threads.h>
15 #include <linux/kernel.h> 15 #include <linux/kernel.h>
16 #include <linux/types.h> 16 #include <linux/types.h>
17 #include <linux/timex.h> 17 #include <linux/timex.h>
18 #include <linux/jiffies.h> 18 #include <linux/jiffies.h>
19 #include <linux/rbtree.h> 19 #include <linux/rbtree.h>
20 #include <linux/thread_info.h> 20 #include <linux/thread_info.h>
21 #include <linux/cpumask.h> 21 #include <linux/cpumask.h>
22 #include <linux/errno.h> 22 #include <linux/errno.h>
23 #include <linux/nodemask.h> 23 #include <linux/nodemask.h>
24 #include <linux/mm_types.h> 24 #include <linux/mm_types.h>
25 25
26 #include <asm/page.h> 26 #include <asm/page.h>
27 #include <asm/ptrace.h> 27 #include <asm/ptrace.h>
28 #include <asm/cputime.h> 28 #include <asm/cputime.h>
29 29
30 #include <linux/smp.h> 30 #include <linux/smp.h>
31 #include <linux/sem.h> 31 #include <linux/sem.h>
32 #include <linux/signal.h> 32 #include <linux/signal.h>
33 #include <linux/compiler.h> 33 #include <linux/compiler.h>
34 #include <linux/completion.h> 34 #include <linux/completion.h>
35 #include <linux/pid.h> 35 #include <linux/pid.h>
36 #include <linux/percpu.h> 36 #include <linux/percpu.h>
37 #include <linux/topology.h> 37 #include <linux/topology.h>
38 #include <linux/proportions.h> 38 #include <linux/proportions.h>
39 #include <linux/seccomp.h> 39 #include <linux/seccomp.h>
40 #include <linux/rcupdate.h> 40 #include <linux/rcupdate.h>
41 #include <linux/rculist.h> 41 #include <linux/rculist.h>
42 #include <linux/rtmutex.h> 42 #include <linux/rtmutex.h>
43 43
44 #include <linux/time.h> 44 #include <linux/time.h>
45 #include <linux/param.h> 45 #include <linux/param.h>
46 #include <linux/resource.h> 46 #include <linux/resource.h>
47 #include <linux/timer.h> 47 #include <linux/timer.h>
48 #include <linux/hrtimer.h> 48 #include <linux/hrtimer.h>
49 #include <linux/task_io_accounting.h> 49 #include <linux/task_io_accounting.h>
50 #include <linux/latencytop.h> 50 #include <linux/latencytop.h>
51 #include <linux/cred.h> 51 #include <linux/cred.h>
52 #include <linux/llist.h> 52 #include <linux/llist.h>
53 #include <linux/uidgid.h> 53 #include <linux/uidgid.h>
54 54
55 #include <asm/processor.h> 55 #include <asm/processor.h>
56 56
57 struct exec_domain; 57 struct exec_domain;
58 struct futex_pi_state; 58 struct futex_pi_state;
59 struct robust_list_head; 59 struct robust_list_head;
60 struct bio_list; 60 struct bio_list;
61 struct fs_struct; 61 struct fs_struct;
62 struct perf_event_context; 62 struct perf_event_context;
63 struct blk_plug; 63 struct blk_plug;
64 64
65 /* 65 /*
66 * List of flags we want to share for kernel threads, 66 * List of flags we want to share for kernel threads,
67 * if only because they are not used by them anyway. 67 * if only because they are not used by them anyway.
68 */ 68 */
69 #define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND) 69 #define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
70 70
71 /* 71 /*
72 * These are the constant used to fake the fixed-point load-average 72 * These are the constant used to fake the fixed-point load-average
73 * counting. Some notes: 73 * counting. Some notes:
74 * - 11 bit fractions expand to 22 bits by the multiplies: this gives 74 * - 11 bit fractions expand to 22 bits by the multiplies: this gives
75 * a load-average precision of 10 bits integer + 11 bits fractional 75 * a load-average precision of 10 bits integer + 11 bits fractional
76 * - if you want to count load-averages more often, you need more 76 * - if you want to count load-averages more often, you need more
77 * precision, or rounding will get you. With 2-second counting freq, 77 * precision, or rounding will get you. With 2-second counting freq,
78 * the EXP_n values would be 1981, 2034 and 2043 if still using only 78 * the EXP_n values would be 1981, 2034 and 2043 if still using only
79 * 11 bit fractions. 79 * 11 bit fractions.
80 */ 80 */
81 extern unsigned long avenrun[]; /* Load averages */ 81 extern unsigned long avenrun[]; /* Load averages */
82 extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); 82 extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
83 83
84 #define FSHIFT 11 /* nr of bits of precision */ 84 #define FSHIFT 11 /* nr of bits of precision */
85 #define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ 85 #define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */
86 #define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */ 86 #define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */
87 #define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ 87 #define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
88 #define EXP_5 2014 /* 1/exp(5sec/5min) */ 88 #define EXP_5 2014 /* 1/exp(5sec/5min) */
89 #define EXP_15 2037 /* 1/exp(5sec/15min) */ 89 #define EXP_15 2037 /* 1/exp(5sec/15min) */
90 90
91 #define CALC_LOAD(load,exp,n) \ 91 #define CALC_LOAD(load,exp,n) \
92 load *= exp; \ 92 load *= exp; \
93 load += n*(FIXED_1-exp); \ 93 load += n*(FIXED_1-exp); \
94 load >>= FSHIFT; 94 load >>= FSHIFT;
95 95
96 extern unsigned long total_forks; 96 extern unsigned long total_forks;
97 extern int nr_threads; 97 extern int nr_threads;
98 DECLARE_PER_CPU(unsigned long, process_counts); 98 DECLARE_PER_CPU(unsigned long, process_counts);
99 extern int nr_processes(void); 99 extern int nr_processes(void);
100 extern unsigned long nr_running(void); 100 extern unsigned long nr_running(void);
101 extern unsigned long nr_uninterruptible(void); 101 extern unsigned long nr_uninterruptible(void);
102 extern unsigned long nr_iowait(void); 102 extern unsigned long nr_iowait(void);
103 extern unsigned long nr_iowait_cpu(int cpu); 103 extern unsigned long nr_iowait_cpu(int cpu);
104 extern unsigned long this_cpu_load(void); 104 extern unsigned long this_cpu_load(void);
105 105
106 106
107 extern void calc_global_load(unsigned long ticks); 107 extern void calc_global_load(unsigned long ticks);
108 extern void update_cpu_load_nohz(void); 108 extern void update_cpu_load_nohz(void);
109 109
110 extern unsigned long get_parent_ip(unsigned long addr); 110 extern unsigned long get_parent_ip(unsigned long addr);
111 111
112 struct seq_file; 112 struct seq_file;
113 struct cfs_rq; 113 struct cfs_rq;
114 struct task_group; 114 struct task_group;
115 #ifdef CONFIG_SCHED_DEBUG 115 #ifdef CONFIG_SCHED_DEBUG
116 extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); 116 extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
117 extern void proc_sched_set_task(struct task_struct *p); 117 extern void proc_sched_set_task(struct task_struct *p);
118 extern void 118 extern void
119 print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); 119 print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
120 #else 120 #else
121 static inline void 121 static inline void
122 proc_sched_show_task(struct task_struct *p, struct seq_file *m) 122 proc_sched_show_task(struct task_struct *p, struct seq_file *m)
123 { 123 {
124 } 124 }
125 static inline void proc_sched_set_task(struct task_struct *p) 125 static inline void proc_sched_set_task(struct task_struct *p)
126 { 126 {
127 } 127 }
128 static inline void 128 static inline void
129 print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) 129 print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
130 { 130 {
131 } 131 }
132 #endif 132 #endif
133 133
134 /* 134 /*
135 * Task state bitmask. NOTE! These bits are also 135 * Task state bitmask. NOTE! These bits are also
136 * encoded in fs/proc/array.c: get_task_state(). 136 * encoded in fs/proc/array.c: get_task_state().
137 * 137 *
138 * We have two separate sets of flags: task->state 138 * We have two separate sets of flags: task->state
139 * is about runnability, while task->exit_state are 139 * is about runnability, while task->exit_state are
140 * about the task exiting. Confusing, but this way 140 * about the task exiting. Confusing, but this way
141 * modifying one set can't modify the other one by 141 * modifying one set can't modify the other one by
142 * mistake. 142 * mistake.
143 */ 143 */
144 #define TASK_RUNNING 0 144 #define TASK_RUNNING 0
145 #define TASK_INTERRUPTIBLE 1 145 #define TASK_INTERRUPTIBLE 1
146 #define TASK_UNINTERRUPTIBLE 2 146 #define TASK_UNINTERRUPTIBLE 2
147 #define __TASK_STOPPED 4 147 #define __TASK_STOPPED 4
148 #define __TASK_TRACED 8 148 #define __TASK_TRACED 8
149 /* in tsk->exit_state */ 149 /* in tsk->exit_state */
150 #define EXIT_ZOMBIE 16 150 #define EXIT_ZOMBIE 16
151 #define EXIT_DEAD 32 151 #define EXIT_DEAD 32
152 /* in tsk->state again */ 152 /* in tsk->state again */
153 #define TASK_DEAD 64 153 #define TASK_DEAD 64
154 #define TASK_WAKEKILL 128 154 #define TASK_WAKEKILL 128
155 #define TASK_WAKING 256 155 #define TASK_WAKING 256
156 #define TASK_STATE_MAX 512 156 #define TASK_STATE_MAX 512
157 157
158 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" 158 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
159 159
160 extern char ___assert_task_state[1 - 2*!!( 160 extern char ___assert_task_state[1 - 2*!!(
161 sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; 161 sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
162 162
163 /* Convenience macros for the sake of set_task_state */ 163 /* Convenience macros for the sake of set_task_state */
164 #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) 164 #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
165 #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) 165 #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
166 #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) 166 #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
167 167
168 /* Convenience macros for the sake of wake_up */ 168 /* Convenience macros for the sake of wake_up */
169 #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) 169 #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
170 #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) 170 #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
171 171
172 /* get_task_state() */ 172 /* get_task_state() */
173 #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ 173 #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
174 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ 174 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
175 __TASK_TRACED) 175 __TASK_TRACED)
176 176
177 #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) 177 #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
178 #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) 178 #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
179 #define task_is_dead(task) ((task)->exit_state != 0) 179 #define task_is_dead(task) ((task)->exit_state != 0)
180 #define task_is_stopped_or_traced(task) \ 180 #define task_is_stopped_or_traced(task) \
181 ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) 181 ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
182 #define task_contributes_to_load(task) \ 182 #define task_contributes_to_load(task) \
183 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ 183 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
184 (task->flags & PF_FROZEN) == 0) 184 (task->flags & PF_FROZEN) == 0)
185 185
186 #define __set_task_state(tsk, state_value) \ 186 #define __set_task_state(tsk, state_value) \
187 do { (tsk)->state = (state_value); } while (0) 187 do { (tsk)->state = (state_value); } while (0)
188 #define set_task_state(tsk, state_value) \ 188 #define set_task_state(tsk, state_value) \
189 set_mb((tsk)->state, (state_value)) 189 set_mb((tsk)->state, (state_value))
190 190
191 /* 191 /*
192 * set_current_state() includes a barrier so that the write of current->state 192 * set_current_state() includes a barrier so that the write of current->state
193 * is correctly serialised wrt the caller's subsequent test of whether to 193 * is correctly serialised wrt the caller's subsequent test of whether to
194 * actually sleep: 194 * actually sleep:
195 * 195 *
196 * set_current_state(TASK_UNINTERRUPTIBLE); 196 * set_current_state(TASK_UNINTERRUPTIBLE);
197 * if (do_i_need_to_sleep()) 197 * if (do_i_need_to_sleep())
198 * schedule(); 198 * schedule();
199 * 199 *
200 * If the caller does not need such serialisation then use __set_current_state() 200 * If the caller does not need such serialisation then use __set_current_state()
201 */ 201 */
202 #define __set_current_state(state_value) \ 202 #define __set_current_state(state_value) \
203 do { current->state = (state_value); } while (0) 203 do { current->state = (state_value); } while (0)
204 #define set_current_state(state_value) \ 204 #define set_current_state(state_value) \
205 set_mb(current->state, (state_value)) 205 set_mb(current->state, (state_value))
206 206
207 /* Task command name length */ 207 /* Task command name length */
208 #define TASK_COMM_LEN 16 208 #define TASK_COMM_LEN 16
209 209
210 #include <linux/spinlock.h> 210 #include <linux/spinlock.h>
211 211
212 /* 212 /*
213 * This serializes "schedule()" and also protects 213 * This serializes "schedule()" and also protects
214 * the run-queue from deletions/modifications (but 214 * the run-queue from deletions/modifications (but
215 * _adding_ to the beginning of the run-queue has 215 * _adding_ to the beginning of the run-queue has
216 * a separate lock). 216 * a separate lock).
217 */ 217 */
218 extern rwlock_t tasklist_lock; 218 extern rwlock_t tasklist_lock;
219 extern spinlock_t mmlist_lock; 219 extern spinlock_t mmlist_lock;
220 220
221 struct task_struct; 221 struct task_struct;
222 222
223 #ifdef CONFIG_PROVE_RCU 223 #ifdef CONFIG_PROVE_RCU
224 extern int lockdep_tasklist_lock_is_held(void); 224 extern int lockdep_tasklist_lock_is_held(void);
225 #endif /* #ifdef CONFIG_PROVE_RCU */ 225 #endif /* #ifdef CONFIG_PROVE_RCU */
226 226
227 extern void sched_init(void); 227 extern void sched_init(void);
228 extern void sched_init_smp(void); 228 extern void sched_init_smp(void);
229 extern asmlinkage void schedule_tail(struct task_struct *prev); 229 extern asmlinkage void schedule_tail(struct task_struct *prev);
230 extern void init_idle(struct task_struct *idle, int cpu); 230 extern void init_idle(struct task_struct *idle, int cpu);
231 extern void init_idle_bootup_task(struct task_struct *idle); 231 extern void init_idle_bootup_task(struct task_struct *idle);
232 232
233 extern int runqueue_is_locked(int cpu); 233 extern int runqueue_is_locked(int cpu);
234 234
235 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 235 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
236 extern void nohz_balance_enter_idle(int cpu); 236 extern void nohz_balance_enter_idle(int cpu);
237 extern void set_cpu_sd_state_idle(void); 237 extern void set_cpu_sd_state_idle(void);
238 extern int get_nohz_timer_target(void); 238 extern int get_nohz_timer_target(void);
239 #else 239 #else
240 static inline void nohz_balance_enter_idle(int cpu) { } 240 static inline void nohz_balance_enter_idle(int cpu) { }
241 static inline void set_cpu_sd_state_idle(void) { } 241 static inline void set_cpu_sd_state_idle(void) { }
242 #endif 242 #endif
243 243
244 /* 244 /*
245 * Only dump TASK_* tasks. (0 for all tasks) 245 * Only dump TASK_* tasks. (0 for all tasks)
246 */ 246 */
247 extern void show_state_filter(unsigned long state_filter); 247 extern void show_state_filter(unsigned long state_filter);
248 248
249 static inline void show_state(void) 249 static inline void show_state(void)
250 { 250 {
251 show_state_filter(0); 251 show_state_filter(0);
252 } 252 }
253 253
254 extern void show_regs(struct pt_regs *); 254 extern void show_regs(struct pt_regs *);
255 255
256 /* 256 /*
257 * TASK is a pointer to the task whose backtrace we want to see (or NULL for current 257 * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
258 * task), SP is the stack pointer of the first frame that should be shown in the back 258 * task), SP is the stack pointer of the first frame that should be shown in the back
259 * trace (or NULL if the entire call-chain of the task should be shown). 259 * trace (or NULL if the entire call-chain of the task should be shown).
260 */ 260 */
261 extern void show_stack(struct task_struct *task, unsigned long *sp); 261 extern void show_stack(struct task_struct *task, unsigned long *sp);
262 262
263 void io_schedule(void); 263 void io_schedule(void);
264 long io_schedule_timeout(long timeout); 264 long io_schedule_timeout(long timeout);
265 265
266 extern void cpu_init (void); 266 extern void cpu_init (void);
267 extern void trap_init(void); 267 extern void trap_init(void);
268 extern void update_process_times(int user); 268 extern void update_process_times(int user);
269 extern void scheduler_tick(void); 269 extern void scheduler_tick(void);
270 270
271 extern void sched_show_task(struct task_struct *p); 271 extern void sched_show_task(struct task_struct *p);
272 272
273 #ifdef CONFIG_LOCKUP_DETECTOR 273 #ifdef CONFIG_LOCKUP_DETECTOR
274 extern void touch_softlockup_watchdog(void); 274 extern void touch_softlockup_watchdog(void);
275 extern void touch_softlockup_watchdog_sync(void); 275 extern void touch_softlockup_watchdog_sync(void);
276 extern void touch_all_softlockup_watchdogs(void); 276 extern void touch_all_softlockup_watchdogs(void);
277 extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, 277 extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
278 void __user *buffer, 278 void __user *buffer,
279 size_t *lenp, loff_t *ppos); 279 size_t *lenp, loff_t *ppos);
280 extern unsigned int softlockup_panic; 280 extern unsigned int softlockup_panic;
281 void lockup_detector_init(void); 281 void lockup_detector_init(void);
282 #else 282 #else
283 static inline void touch_softlockup_watchdog(void) 283 static inline void touch_softlockup_watchdog(void)
284 { 284 {
285 } 285 }
286 static inline void touch_softlockup_watchdog_sync(void) 286 static inline void touch_softlockup_watchdog_sync(void)
287 { 287 {
288 } 288 }
289 static inline void touch_all_softlockup_watchdogs(void) 289 static inline void touch_all_softlockup_watchdogs(void)
290 { 290 {
291 } 291 }
292 static inline void lockup_detector_init(void) 292 static inline void lockup_detector_init(void)
293 { 293 {
294 } 294 }
295 #endif 295 #endif
296 296
297 #ifdef CONFIG_DETECT_HUNG_TASK 297 #ifdef CONFIG_DETECT_HUNG_TASK
298 extern unsigned int sysctl_hung_task_panic; 298 extern unsigned int sysctl_hung_task_panic;
299 extern unsigned long sysctl_hung_task_check_count; 299 extern unsigned long sysctl_hung_task_check_count;
300 extern unsigned long sysctl_hung_task_timeout_secs; 300 extern unsigned long sysctl_hung_task_timeout_secs;
301 extern unsigned long sysctl_hung_task_warnings; 301 extern unsigned long sysctl_hung_task_warnings;
302 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, 302 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
303 void __user *buffer, 303 void __user *buffer,
304 size_t *lenp, loff_t *ppos); 304 size_t *lenp, loff_t *ppos);
305 #else 305 #else
306 /* Avoid need for ifdefs elsewhere in the code */ 306 /* Avoid need for ifdefs elsewhere in the code */
307 enum { sysctl_hung_task_timeout_secs = 0 }; 307 enum { sysctl_hung_task_timeout_secs = 0 };
308 #endif 308 #endif
309 309
310 /* Attach to any functions which should be ignored in wchan output. */ 310 /* Attach to any functions which should be ignored in wchan output. */
311 #define __sched __attribute__((__section__(".sched.text"))) 311 #define __sched __attribute__((__section__(".sched.text")))
312 312
313 /* Linker adds these: start and end of __sched functions */ 313 /* Linker adds these: start and end of __sched functions */
314 extern char __sched_text_start[], __sched_text_end[]; 314 extern char __sched_text_start[], __sched_text_end[];
315 315
316 /* Is this address in the __sched functions? */ 316 /* Is this address in the __sched functions? */
317 extern int in_sched_functions(unsigned long addr); 317 extern int in_sched_functions(unsigned long addr);
318 318
319 #define MAX_SCHEDULE_TIMEOUT LONG_MAX 319 #define MAX_SCHEDULE_TIMEOUT LONG_MAX
320 extern signed long schedule_timeout(signed long timeout); 320 extern signed long schedule_timeout(signed long timeout);
321 extern signed long schedule_timeout_interruptible(signed long timeout); 321 extern signed long schedule_timeout_interruptible(signed long timeout);
322 extern signed long schedule_timeout_killable(signed long timeout); 322 extern signed long schedule_timeout_killable(signed long timeout);
323 extern signed long schedule_timeout_uninterruptible(signed long timeout); 323 extern signed long schedule_timeout_uninterruptible(signed long timeout);
324 asmlinkage void schedule(void); 324 asmlinkage void schedule(void);
325 extern void schedule_preempt_disabled(void); 325 extern void schedule_preempt_disabled(void);
326 extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); 326 extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
327 327
328 struct nsproxy; 328 struct nsproxy;
329 struct user_namespace; 329 struct user_namespace;
330 330
331 /* 331 /*
332 * Default maximum number of active map areas, this limits the number of vmas 332 * Default maximum number of active map areas, this limits the number of vmas
333 * per mm struct. Users can overwrite this number by sysctl but there is a 333 * per mm struct. Users can overwrite this number by sysctl but there is a
334 * problem. 334 * problem.
335 * 335 *
336 * When a program's coredump is generated as ELF format, a section is created 336 * When a program's coredump is generated as ELF format, a section is created
337 * per a vma. In ELF, the number of sections is represented in unsigned short. 337 * per a vma. In ELF, the number of sections is represented in unsigned short.
338 * This means the number of sections should be smaller than 65535 at coredump. 338 * This means the number of sections should be smaller than 65535 at coredump.
339 * Because the kernel adds some informative sections to a image of program at 339 * Because the kernel adds some informative sections to a image of program at
340 * generating coredump, we need some margin. The number of extra sections is 340 * generating coredump, we need some margin. The number of extra sections is
341 * 1-3 now and depends on arch. We use "5" as safe margin, here. 341 * 1-3 now and depends on arch. We use "5" as safe margin, here.
342 */ 342 */
343 #define MAPCOUNT_ELF_CORE_MARGIN (5) 343 #define MAPCOUNT_ELF_CORE_MARGIN (5)
344 #define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) 344 #define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
345 345
346 extern int sysctl_max_map_count; 346 extern int sysctl_max_map_count;
347 347
348 #include <linux/aio.h> 348 #include <linux/aio.h>
349 349
350 #ifdef CONFIG_MMU 350 #ifdef CONFIG_MMU
351 extern void arch_pick_mmap_layout(struct mm_struct *mm); 351 extern void arch_pick_mmap_layout(struct mm_struct *mm);
352 extern unsigned long 352 extern unsigned long
353 arch_get_unmapped_area(struct file *, unsigned long, unsigned long, 353 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
354 unsigned long, unsigned long); 354 unsigned long, unsigned long);
355 extern unsigned long 355 extern unsigned long
356 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, 356 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
357 unsigned long len, unsigned long pgoff, 357 unsigned long len, unsigned long pgoff,
358 unsigned long flags); 358 unsigned long flags);
359 extern void arch_unmap_area(struct mm_struct *, unsigned long); 359 extern void arch_unmap_area(struct mm_struct *, unsigned long);
360 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); 360 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
361 #else 361 #else
362 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} 362 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
363 #endif 363 #endif
364 364
365 365
366 extern void set_dumpable(struct mm_struct *mm, int value); 366 extern void set_dumpable(struct mm_struct *mm, int value);
367 extern int get_dumpable(struct mm_struct *mm); 367 extern int get_dumpable(struct mm_struct *mm);
368 368
369 /* get/set_dumpable() values */ 369 /* get/set_dumpable() values */
370 #define SUID_DUMPABLE_DISABLED 0 370 #define SUID_DUMPABLE_DISABLED 0
371 #define SUID_DUMPABLE_ENABLED 1 371 #define SUID_DUMPABLE_ENABLED 1
372 #define SUID_DUMPABLE_SAFE 2 372 #define SUID_DUMPABLE_SAFE 2
373 373
374 /* mm flags */ 374 /* mm flags */
375 /* dumpable bits */ 375 /* dumpable bits */
376 #define MMF_DUMPABLE 0 /* core dump is permitted */ 376 #define MMF_DUMPABLE 0 /* core dump is permitted */
377 #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ 377 #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */
378 378
379 #define MMF_DUMPABLE_BITS 2 379 #define MMF_DUMPABLE_BITS 2
380 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) 380 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
381 381
382 /* coredump filter bits */ 382 /* coredump filter bits */
383 #define MMF_DUMP_ANON_PRIVATE 2 383 #define MMF_DUMP_ANON_PRIVATE 2
384 #define MMF_DUMP_ANON_SHARED 3 384 #define MMF_DUMP_ANON_SHARED 3
385 #define MMF_DUMP_MAPPED_PRIVATE 4 385 #define MMF_DUMP_MAPPED_PRIVATE 4
386 #define MMF_DUMP_MAPPED_SHARED 5 386 #define MMF_DUMP_MAPPED_SHARED 5
387 #define MMF_DUMP_ELF_HEADERS 6 387 #define MMF_DUMP_ELF_HEADERS 6
388 #define MMF_DUMP_HUGETLB_PRIVATE 7 388 #define MMF_DUMP_HUGETLB_PRIVATE 7
389 #define MMF_DUMP_HUGETLB_SHARED 8 389 #define MMF_DUMP_HUGETLB_SHARED 8
390 390
391 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS 391 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
392 #define MMF_DUMP_FILTER_BITS 7 392 #define MMF_DUMP_FILTER_BITS 7
393 #define MMF_DUMP_FILTER_MASK \ 393 #define MMF_DUMP_FILTER_MASK \
394 (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) 394 (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
395 #define MMF_DUMP_FILTER_DEFAULT \ 395 #define MMF_DUMP_FILTER_DEFAULT \
396 ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ 396 ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
397 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) 397 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
398 398
399 #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS 399 #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
400 # define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) 400 # define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS)
401 #else 401 #else
402 # define MMF_DUMP_MASK_DEFAULT_ELF 0 402 # define MMF_DUMP_MASK_DEFAULT_ELF 0
403 #endif 403 #endif
404 /* leave room for more dump flags */ 404 /* leave room for more dump flags */
405 #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ 405 #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
406 #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ 406 #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
407 #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ 407 #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
408 408
409 #define MMF_HAS_UPROBES 19 /* has uprobes */ 409 #define MMF_HAS_UPROBES 19 /* has uprobes */
410 #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ 410 #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
411 411
412 #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) 412 #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
413 413
414 struct sighand_struct { 414 struct sighand_struct {
415 atomic_t count; 415 atomic_t count;
416 struct k_sigaction action[_NSIG]; 416 struct k_sigaction action[_NSIG];
417 spinlock_t siglock; 417 spinlock_t siglock;
418 wait_queue_head_t signalfd_wqh; 418 wait_queue_head_t signalfd_wqh;
419 }; 419 };
420 420
421 struct pacct_struct { 421 struct pacct_struct {
422 int ac_flag; 422 int ac_flag;
423 long ac_exitcode; 423 long ac_exitcode;
424 unsigned long ac_mem; 424 unsigned long ac_mem;
425 cputime_t ac_utime, ac_stime; 425 cputime_t ac_utime, ac_stime;
426 unsigned long ac_minflt, ac_majflt; 426 unsigned long ac_minflt, ac_majflt;
427 }; 427 };
428 428
429 struct cpu_itimer { 429 struct cpu_itimer {
430 cputime_t expires; 430 cputime_t expires;
431 cputime_t incr; 431 cputime_t incr;
432 u32 error; 432 u32 error;
433 u32 incr_error; 433 u32 incr_error;
434 }; 434 };
435 435
436 /** 436 /**
437 * struct task_cputime - collected CPU time counts 437 * struct task_cputime - collected CPU time counts
438 * @utime: time spent in user mode, in &cputime_t units 438 * @utime: time spent in user mode, in &cputime_t units
439 * @stime: time spent in kernel mode, in &cputime_t units 439 * @stime: time spent in kernel mode, in &cputime_t units
440 * @sum_exec_runtime: total time spent on the CPU, in nanoseconds 440 * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
441 * 441 *
442 * This structure groups together three kinds of CPU time that are 442 * This structure groups together three kinds of CPU time that are
443 * tracked for threads and thread groups. Most things considering 443 * tracked for threads and thread groups. Most things considering
444 * CPU time want to group these counts together and treat all three 444 * CPU time want to group these counts together and treat all three
445 * of them in parallel. 445 * of them in parallel.
446 */ 446 */
447 struct task_cputime { 447 struct task_cputime {
448 cputime_t utime; 448 cputime_t utime;
449 cputime_t stime; 449 cputime_t stime;
450 unsigned long long sum_exec_runtime; 450 unsigned long long sum_exec_runtime;
451 }; 451 };
452 /* Alternate field names when used to cache expirations. */ 452 /* Alternate field names when used to cache expirations. */
453 #define prof_exp stime 453 #define prof_exp stime
454 #define virt_exp utime 454 #define virt_exp utime
455 #define sched_exp sum_exec_runtime 455 #define sched_exp sum_exec_runtime
456 456
457 #define INIT_CPUTIME \ 457 #define INIT_CPUTIME \
458 (struct task_cputime) { \ 458 (struct task_cputime) { \
459 .utime = 0, \ 459 .utime = 0, \
460 .stime = 0, \ 460 .stime = 0, \
461 .sum_exec_runtime = 0, \ 461 .sum_exec_runtime = 0, \
462 } 462 }
463 463
464 /* 464 /*
465 * Disable preemption until the scheduler is running. 465 * Disable preemption until the scheduler is running.
466 * Reset by start_kernel()->sched_init()->init_idle(). 466 * Reset by start_kernel()->sched_init()->init_idle().
467 * 467 *
468 * We include PREEMPT_ACTIVE to avoid cond_resched() from working 468 * We include PREEMPT_ACTIVE to avoid cond_resched() from working
469 * before the scheduler is active -- see should_resched(). 469 * before the scheduler is active -- see should_resched().
470 */ 470 */
471 #define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE) 471 #define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE)
472 472
473 /** 473 /**
474 * struct thread_group_cputimer - thread group interval timer counts 474 * struct thread_group_cputimer - thread group interval timer counts
475 * @cputime: thread group interval timers. 475 * @cputime: thread group interval timers.
476 * @running: non-zero when there are timers running and 476 * @running: non-zero when there are timers running and
477 * @cputime receives updates. 477 * @cputime receives updates.
478 * @lock: lock for fields in this struct. 478 * @lock: lock for fields in this struct.
479 * 479 *
480 * This structure contains the version of task_cputime, above, that is 480 * This structure contains the version of task_cputime, above, that is
481 * used for thread group CPU timer calculations. 481 * used for thread group CPU timer calculations.
482 */ 482 */
483 struct thread_group_cputimer { 483 struct thread_group_cputimer {
484 struct task_cputime cputime; 484 struct task_cputime cputime;
485 int running; 485 int running;
486 raw_spinlock_t lock; 486 raw_spinlock_t lock;
487 }; 487 };
488 488
489 #include <linux/rwsem.h> 489 #include <linux/rwsem.h>
490 struct autogroup; 490 struct autogroup;
491 491
492 /* 492 /*
493 * NOTE! "signal_struct" does not have its own 493 * NOTE! "signal_struct" does not have its own
494 * locking, because a shared signal_struct always 494 * locking, because a shared signal_struct always
495 * implies a shared sighand_struct, so locking 495 * implies a shared sighand_struct, so locking
496 * sighand_struct is always a proper superset of 496 * sighand_struct is always a proper superset of
497 * the locking of signal_struct. 497 * the locking of signal_struct.
498 */ 498 */
499 struct signal_struct { 499 struct signal_struct {
500 atomic_t sigcnt; 500 atomic_t sigcnt;
501 atomic_t live; 501 atomic_t live;
502 int nr_threads; 502 int nr_threads;
503 503
504 wait_queue_head_t wait_chldexit; /* for wait4() */ 504 wait_queue_head_t wait_chldexit; /* for wait4() */
505 505
506 /* current thread group signal load-balancing target: */ 506 /* current thread group signal load-balancing target: */
507 struct task_struct *curr_target; 507 struct task_struct *curr_target;
508 508
509 /* shared signal handling: */ 509 /* shared signal handling: */
510 struct sigpending shared_pending; 510 struct sigpending shared_pending;
511 511
512 /* thread group exit support */ 512 /* thread group exit support */
513 int group_exit_code; 513 int group_exit_code;
514 /* overloaded: 514 /* overloaded:
515 * - notify group_exit_task when ->count is equal to notify_count 515 * - notify group_exit_task when ->count is equal to notify_count
516 * - everyone except group_exit_task is stopped during signal delivery 516 * - everyone except group_exit_task is stopped during signal delivery
517 * of fatal signals, group_exit_task processes the signal. 517 * of fatal signals, group_exit_task processes the signal.
518 */ 518 */
519 int notify_count; 519 int notify_count;
520 struct task_struct *group_exit_task; 520 struct task_struct *group_exit_task;
521 521
522 /* thread group stop support, overloads group_exit_code too */ 522 /* thread group stop support, overloads group_exit_code too */
523 int group_stop_count; 523 int group_stop_count;
524 unsigned int flags; /* see SIGNAL_* flags below */ 524 unsigned int flags; /* see SIGNAL_* flags below */
525 525
526 /* 526 /*
527 * PR_SET_CHILD_SUBREAPER marks a process, like a service 527 * PR_SET_CHILD_SUBREAPER marks a process, like a service
528 * manager, to re-parent orphan (double-forking) child processes 528 * manager, to re-parent orphan (double-forking) child processes
529 * to this process instead of 'init'. The service manager is 529 * to this process instead of 'init'. The service manager is
530 * able to receive SIGCHLD signals and is able to investigate 530 * able to receive SIGCHLD signals and is able to investigate
531 * the process until it calls wait(). All children of this 531 * the process until it calls wait(). All children of this
532 * process will inherit a flag if they should look for a 532 * process will inherit a flag if they should look for a
533 * child_subreaper process at exit. 533 * child_subreaper process at exit.
534 */ 534 */
535 unsigned int is_child_subreaper:1; 535 unsigned int is_child_subreaper:1;
536 unsigned int has_child_subreaper:1; 536 unsigned int has_child_subreaper:1;
537 537
538 /* POSIX.1b Interval Timers */ 538 /* POSIX.1b Interval Timers */
539 struct list_head posix_timers; 539 struct list_head posix_timers;
540 540
541 /* ITIMER_REAL timer for the process */ 541 /* ITIMER_REAL timer for the process */
542 struct hrtimer real_timer; 542 struct hrtimer real_timer;
543 struct pid *leader_pid; 543 struct pid *leader_pid;
544 ktime_t it_real_incr; 544 ktime_t it_real_incr;
545 545
546 /* 546 /*
547 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use 547 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
548 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these 548 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
549 * values are defined to 0 and 1 respectively 549 * values are defined to 0 and 1 respectively
550 */ 550 */
551 struct cpu_itimer it[2]; 551 struct cpu_itimer it[2];
552 552
553 /* 553 /*
554 * Thread group totals for process CPU timers. 554 * Thread group totals for process CPU timers.
555 * See thread_group_cputimer(), et al, for details. 555 * See thread_group_cputimer(), et al, for details.
556 */ 556 */
557 struct thread_group_cputimer cputimer; 557 struct thread_group_cputimer cputimer;
558 558
559 /* Earliest-expiration cache. */ 559 /* Earliest-expiration cache. */
560 struct task_cputime cputime_expires; 560 struct task_cputime cputime_expires;
561 561
562 struct list_head cpu_timers[3]; 562 struct list_head cpu_timers[3];
563 563
564 struct pid *tty_old_pgrp; 564 struct pid *tty_old_pgrp;
565 565
566 /* boolean value for session group leader */ 566 /* boolean value for session group leader */
567 int leader; 567 int leader;
568 568
569 struct tty_struct *tty; /* NULL if no tty */ 569 struct tty_struct *tty; /* NULL if no tty */
570 570
571 #ifdef CONFIG_SCHED_AUTOGROUP 571 #ifdef CONFIG_SCHED_AUTOGROUP
572 struct autogroup *autogroup; 572 struct autogroup *autogroup;
573 #endif 573 #endif
574 /* 574 /*
575 * Cumulative resource counters for dead threads in the group, 575 * Cumulative resource counters for dead threads in the group,
576 * and for reaped dead child processes forked by this group. 576 * and for reaped dead child processes forked by this group.
577 * Live threads maintain their own counters and add to these 577 * Live threads maintain their own counters and add to these
578 * in __exit_signal, except for the group leader. 578 * in __exit_signal, except for the group leader.
579 */ 579 */
580 cputime_t utime, stime, cutime, cstime; 580 cputime_t utime, stime, cutime, cstime;
581 cputime_t gtime; 581 cputime_t gtime;
582 cputime_t cgtime; 582 cputime_t cgtime;
583 #ifndef CONFIG_VIRT_CPU_ACCOUNTING 583 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
584 cputime_t prev_utime, prev_stime; 584 cputime_t prev_utime, prev_stime;
585 #endif 585 #endif
586 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; 586 unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
587 unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; 587 unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
588 unsigned long inblock, oublock, cinblock, coublock; 588 unsigned long inblock, oublock, cinblock, coublock;
589 unsigned long maxrss, cmaxrss; 589 unsigned long maxrss, cmaxrss;
590 struct task_io_accounting ioac; 590 struct task_io_accounting ioac;
591 591
592 /* 592 /*
593 * Cumulative ns of schedule CPU time fo dead threads in the 593 * Cumulative ns of schedule CPU time fo dead threads in the
594 * group, not including a zombie group leader, (This only differs 594 * group, not including a zombie group leader, (This only differs
595 * from jiffies_to_ns(utime + stime) if sched_clock uses something 595 * from jiffies_to_ns(utime + stime) if sched_clock uses something
596 * other than jiffies.) 596 * other than jiffies.)
597 */ 597 */
598 unsigned long long sum_sched_runtime; 598 unsigned long long sum_sched_runtime;
599 599
600 /* 600 /*
601 * We don't bother to synchronize most readers of this at all, 601 * We don't bother to synchronize most readers of this at all,
602 * because there is no reader checking a limit that actually needs 602 * because there is no reader checking a limit that actually needs
603 * to get both rlim_cur and rlim_max atomically, and either one 603 * to get both rlim_cur and rlim_max atomically, and either one
604 * alone is a single word that can safely be read normally. 604 * alone is a single word that can safely be read normally.
605 * getrlimit/setrlimit use task_lock(current->group_leader) to 605 * getrlimit/setrlimit use task_lock(current->group_leader) to
606 * protect this instead of the siglock, because they really 606 * protect this instead of the siglock, because they really
607 * have no need to disable irqs. 607 * have no need to disable irqs.
608 */ 608 */
609 struct rlimit rlim[RLIM_NLIMITS]; 609 struct rlimit rlim[RLIM_NLIMITS];
610 610
611 #ifdef CONFIG_BSD_PROCESS_ACCT 611 #ifdef CONFIG_BSD_PROCESS_ACCT
612 struct pacct_struct pacct; /* per-process accounting information */ 612 struct pacct_struct pacct; /* per-process accounting information */
613 #endif 613 #endif
614 #ifdef CONFIG_TASKSTATS 614 #ifdef CONFIG_TASKSTATS
615 struct taskstats *stats; 615 struct taskstats *stats;
616 #endif 616 #endif
617 #ifdef CONFIG_AUDIT 617 #ifdef CONFIG_AUDIT
618 unsigned audit_tty; 618 unsigned audit_tty;
619 struct tty_audit_buf *tty_audit_buf; 619 struct tty_audit_buf *tty_audit_buf;
620 #endif 620 #endif
621 #ifdef CONFIG_CGROUPS 621 #ifdef CONFIG_CGROUPS
622 /* 622 /*
623 * group_rwsem prevents new tasks from entering the threadgroup and 623 * group_rwsem prevents new tasks from entering the threadgroup and
624 * member tasks from exiting,a more specifically, setting of 624 * member tasks from exiting,a more specifically, setting of
625 * PF_EXITING. fork and exit paths are protected with this rwsem 625 * PF_EXITING. fork and exit paths are protected with this rwsem
626 * using threadgroup_change_begin/end(). Users which require 626 * using threadgroup_change_begin/end(). Users which require
627 * threadgroup to remain stable should use threadgroup_[un]lock() 627 * threadgroup to remain stable should use threadgroup_[un]lock()
628 * which also takes care of exec path. Currently, cgroup is the 628 * which also takes care of exec path. Currently, cgroup is the
629 * only user. 629 * only user.
630 */ 630 */
631 struct rw_semaphore group_rwsem; 631 struct rw_semaphore group_rwsem;
632 #endif 632 #endif
633 633
634 int oom_score_adj; /* OOM kill score adjustment */ 634 int oom_score_adj; /* OOM kill score adjustment */
635 int oom_score_adj_min; /* OOM kill score adjustment minimum value. 635 int oom_score_adj_min; /* OOM kill score adjustment minimum value.
636 * Only settable by CAP_SYS_RESOURCE. */ 636 * Only settable by CAP_SYS_RESOURCE. */
637 637
638 struct mutex cred_guard_mutex; /* guard against foreign influences on 638 struct mutex cred_guard_mutex; /* guard against foreign influences on
639 * credential calculations 639 * credential calculations
640 * (notably. ptrace) */ 640 * (notably. ptrace) */
641 }; 641 };
642 642
643 /* 643 /*
644 * Bits in flags field of signal_struct. 644 * Bits in flags field of signal_struct.
645 */ 645 */
646 #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ 646 #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */
647 #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ 647 #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */
648 #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ 648 #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */
649 /* 649 /*
650 * Pending notifications to parent. 650 * Pending notifications to parent.
651 */ 651 */
652 #define SIGNAL_CLD_STOPPED 0x00000010 652 #define SIGNAL_CLD_STOPPED 0x00000010
653 #define SIGNAL_CLD_CONTINUED 0x00000020 653 #define SIGNAL_CLD_CONTINUED 0x00000020
654 #define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) 654 #define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
655 655
656 #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ 656 #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */
657 657
658 /* If true, all threads except ->group_exit_task have pending SIGKILL */ 658 /* If true, all threads except ->group_exit_task have pending SIGKILL */
659 static inline int signal_group_exit(const struct signal_struct *sig) 659 static inline int signal_group_exit(const struct signal_struct *sig)
660 { 660 {
661 return (sig->flags & SIGNAL_GROUP_EXIT) || 661 return (sig->flags & SIGNAL_GROUP_EXIT) ||
662 (sig->group_exit_task != NULL); 662 (sig->group_exit_task != NULL);
663 } 663 }
664 664
665 /* 665 /*
666 * Some day this will be a full-fledged user tracking system.. 666 * Some day this will be a full-fledged user tracking system..
667 */ 667 */
668 struct user_struct { 668 struct user_struct {
669 atomic_t __count; /* reference count */ 669 atomic_t __count; /* reference count */
670 atomic_t processes; /* How many processes does this user have? */ 670 atomic_t processes; /* How many processes does this user have? */
671 atomic_t files; /* How many open files does this user have? */ 671 atomic_t files; /* How many open files does this user have? */
672 atomic_t sigpending; /* How many pending signals does this user have? */ 672 atomic_t sigpending; /* How many pending signals does this user have? */
673 #ifdef CONFIG_INOTIFY_USER 673 #ifdef CONFIG_INOTIFY_USER
674 atomic_t inotify_watches; /* How many inotify watches does this user have? */ 674 atomic_t inotify_watches; /* How many inotify watches does this user have? */
675 atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ 675 atomic_t inotify_devs; /* How many inotify devs does this user have opened? */
676 #endif 676 #endif
677 #ifdef CONFIG_FANOTIFY 677 #ifdef CONFIG_FANOTIFY
678 atomic_t fanotify_listeners; 678 atomic_t fanotify_listeners;
679 #endif 679 #endif
680 #ifdef CONFIG_EPOLL 680 #ifdef CONFIG_EPOLL
681 atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ 681 atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
682 #endif 682 #endif
683 #ifdef CONFIG_POSIX_MQUEUE 683 #ifdef CONFIG_POSIX_MQUEUE
684 /* protected by mq_lock */ 684 /* protected by mq_lock */
685 unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ 685 unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
686 #endif 686 #endif
687 unsigned long locked_shm; /* How many pages of mlocked shm ? */ 687 unsigned long locked_shm; /* How many pages of mlocked shm ? */
688 688
689 #ifdef CONFIG_KEYS 689 #ifdef CONFIG_KEYS
690 struct key *uid_keyring; /* UID specific keyring */ 690 struct key *uid_keyring; /* UID specific keyring */
691 struct key *session_keyring; /* UID's default session keyring */ 691 struct key *session_keyring; /* UID's default session keyring */
692 #endif 692 #endif
693 693
694 /* Hash table maintenance information */ 694 /* Hash table maintenance information */
695 struct hlist_node uidhash_node; 695 struct hlist_node uidhash_node;
696 kuid_t uid; 696 kuid_t uid;
697 697
698 #ifdef CONFIG_PERF_EVENTS 698 #ifdef CONFIG_PERF_EVENTS
699 atomic_long_t locked_vm; 699 atomic_long_t locked_vm;
700 #endif 700 #endif
701 }; 701 };
702 702
703 extern int uids_sysfs_init(void); 703 extern int uids_sysfs_init(void);
704 704
705 extern struct user_struct *find_user(kuid_t); 705 extern struct user_struct *find_user(kuid_t);
706 706
707 extern struct user_struct root_user; 707 extern struct user_struct root_user;
708 #define INIT_USER (&root_user) 708 #define INIT_USER (&root_user)
709 709
710 710
711 struct backing_dev_info; 711 struct backing_dev_info;
712 struct reclaim_state; 712 struct reclaim_state;
713 713
714 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 714 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
715 struct sched_info { 715 struct sched_info {
716 /* cumulative counters */ 716 /* cumulative counters */
717 unsigned long pcount; /* # of times run on this cpu */ 717 unsigned long pcount; /* # of times run on this cpu */
718 unsigned long long run_delay; /* time spent waiting on a runqueue */ 718 unsigned long long run_delay; /* time spent waiting on a runqueue */
719 719
720 /* timestamps */ 720 /* timestamps */
721 unsigned long long last_arrival,/* when we last ran on a cpu */ 721 unsigned long long last_arrival,/* when we last ran on a cpu */
722 last_queued; /* when we were last queued to run */ 722 last_queued; /* when we were last queued to run */
723 }; 723 };
724 #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ 724 #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
725 725
726 #ifdef CONFIG_TASK_DELAY_ACCT 726 #ifdef CONFIG_TASK_DELAY_ACCT
727 struct task_delay_info { 727 struct task_delay_info {
728 spinlock_t lock; 728 spinlock_t lock;
729 unsigned int flags; /* Private per-task flags */ 729 unsigned int flags; /* Private per-task flags */
730 730
731 /* For each stat XXX, add following, aligned appropriately 731 /* For each stat XXX, add following, aligned appropriately
732 * 732 *
733 * struct timespec XXX_start, XXX_end; 733 * struct timespec XXX_start, XXX_end;
734 * u64 XXX_delay; 734 * u64 XXX_delay;
735 * u32 XXX_count; 735 * u32 XXX_count;
736 * 736 *
737 * Atomicity of updates to XXX_delay, XXX_count protected by 737 * Atomicity of updates to XXX_delay, XXX_count protected by
738 * single lock above (split into XXX_lock if contention is an issue). 738 * single lock above (split into XXX_lock if contention is an issue).
739 */ 739 */
740 740
741 /* 741 /*
742 * XXX_count is incremented on every XXX operation, the delay 742 * XXX_count is incremented on every XXX operation, the delay
743 * associated with the operation is added to XXX_delay. 743 * associated with the operation is added to XXX_delay.
744 * XXX_delay contains the accumulated delay time in nanoseconds. 744 * XXX_delay contains the accumulated delay time in nanoseconds.
745 */ 745 */
746 struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */ 746 struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */
747 u64 blkio_delay; /* wait for sync block io completion */ 747 u64 blkio_delay; /* wait for sync block io completion */
748 u64 swapin_delay; /* wait for swapin block io completion */ 748 u64 swapin_delay; /* wait for swapin block io completion */
749 u32 blkio_count; /* total count of the number of sync block */ 749 u32 blkio_count; /* total count of the number of sync block */
750 /* io operations performed */ 750 /* io operations performed */
751 u32 swapin_count; /* total count of the number of swapin block */ 751 u32 swapin_count; /* total count of the number of swapin block */
752 /* io operations performed */ 752 /* io operations performed */
753 753
754 struct timespec freepages_start, freepages_end; 754 struct timespec freepages_start, freepages_end;
755 u64 freepages_delay; /* wait for memory reclaim */ 755 u64 freepages_delay; /* wait for memory reclaim */
756 u32 freepages_count; /* total count of memory reclaim */ 756 u32 freepages_count; /* total count of memory reclaim */
757 }; 757 };
758 #endif /* CONFIG_TASK_DELAY_ACCT */ 758 #endif /* CONFIG_TASK_DELAY_ACCT */
759 759
760 static inline int sched_info_on(void) 760 static inline int sched_info_on(void)
761 { 761 {
762 #ifdef CONFIG_SCHEDSTATS 762 #ifdef CONFIG_SCHEDSTATS
763 return 1; 763 return 1;
764 #elif defined(CONFIG_TASK_DELAY_ACCT) 764 #elif defined(CONFIG_TASK_DELAY_ACCT)
765 extern int delayacct_on; 765 extern int delayacct_on;
766 return delayacct_on; 766 return delayacct_on;
767 #else 767 #else
768 return 0; 768 return 0;
769 #endif 769 #endif
770 } 770 }
771 771
772 enum cpu_idle_type { 772 enum cpu_idle_type {
773 CPU_IDLE, 773 CPU_IDLE,
774 CPU_NOT_IDLE, 774 CPU_NOT_IDLE,
775 CPU_NEWLY_IDLE, 775 CPU_NEWLY_IDLE,
776 CPU_MAX_IDLE_TYPES 776 CPU_MAX_IDLE_TYPES
777 }; 777 };
778 778
779 /* 779 /*
780 * Increase resolution of nice-level calculations for 64-bit architectures. 780 * Increase resolution of nice-level calculations for 64-bit architectures.
781 * The extra resolution improves shares distribution and load balancing of 781 * The extra resolution improves shares distribution and load balancing of
782 * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup 782 * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
783 * hierarchies, especially on larger systems. This is not a user-visible change 783 * hierarchies, especially on larger systems. This is not a user-visible change
784 * and does not change the user-interface for setting shares/weights. 784 * and does not change the user-interface for setting shares/weights.
785 * 785 *
786 * We increase resolution only if we have enough bits to allow this increased 786 * We increase resolution only if we have enough bits to allow this increased
787 * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution 787 * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
788 * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the 788 * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
789 * increased costs. 789 * increased costs.
790 */ 790 */
791 #if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ 791 #if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */
792 # define SCHED_LOAD_RESOLUTION 10 792 # define SCHED_LOAD_RESOLUTION 10
793 # define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) 793 # define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION)
794 # define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) 794 # define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION)
795 #else 795 #else
796 # define SCHED_LOAD_RESOLUTION 0 796 # define SCHED_LOAD_RESOLUTION 0
797 # define scale_load(w) (w) 797 # define scale_load(w) (w)
798 # define scale_load_down(w) (w) 798 # define scale_load_down(w) (w)
799 #endif 799 #endif
800 800
801 #define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) 801 #define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION)
802 #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) 802 #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
803 803
804 /* 804 /*
805 * Increase resolution of cpu_power calculations 805 * Increase resolution of cpu_power calculations
806 */ 806 */
807 #define SCHED_POWER_SHIFT 10 807 #define SCHED_POWER_SHIFT 10
808 #define SCHED_POWER_SCALE (1L << SCHED_POWER_SHIFT) 808 #define SCHED_POWER_SCALE (1L << SCHED_POWER_SHIFT)
809 809
810 /* 810 /*
811 * sched-domains (multiprocessor balancing) declarations: 811 * sched-domains (multiprocessor balancing) declarations:
812 */ 812 */
813 #ifdef CONFIG_SMP 813 #ifdef CONFIG_SMP
814 #define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ 814 #define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */
815 #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ 815 #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
816 #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ 816 #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
817 #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ 817 #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
818 #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ 818 #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
819 #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ 819 #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
820 #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ 820 #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
821 #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ 821 #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
822 #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ 822 #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
823 #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ 823 #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
824 #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ 824 #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
825 #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ 825 #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
826 826
827 extern int __weak arch_sd_sibiling_asym_packing(void); 827 extern int __weak arch_sd_sibiling_asym_packing(void);
828 828
829 struct sched_group_power { 829 struct sched_group_power {
830 atomic_t ref; 830 atomic_t ref;
831 /* 831 /*
832 * CPU power of this group, SCHED_LOAD_SCALE being max power for a 832 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
833 * single CPU. 833 * single CPU.
834 */ 834 */
835 unsigned int power, power_orig; 835 unsigned int power, power_orig;
836 unsigned long next_update; 836 unsigned long next_update;
837 /* 837 /*
838 * Number of busy cpus in this group. 838 * Number of busy cpus in this group.
839 */ 839 */
840 atomic_t nr_busy_cpus; 840 atomic_t nr_busy_cpus;
841 841
842 unsigned long cpumask[0]; /* iteration mask */ 842 unsigned long cpumask[0]; /* iteration mask */
843 }; 843 };
844 844
845 struct sched_group { 845 struct sched_group {
846 struct sched_group *next; /* Must be a circular list */ 846 struct sched_group *next; /* Must be a circular list */
847 atomic_t ref; 847 atomic_t ref;
848 848
849 unsigned int group_weight; 849 unsigned int group_weight;
850 struct sched_group_power *sgp; 850 struct sched_group_power *sgp;
851 851
852 /* 852 /*
853 * The CPUs this group covers. 853 * The CPUs this group covers.
854 * 854 *
855 * NOTE: this field is variable length. (Allocated dynamically 855 * NOTE: this field is variable length. (Allocated dynamically
856 * by attaching extra space to the end of the structure, 856 * by attaching extra space to the end of the structure,
857 * depending on how many CPUs the kernel has booted up with) 857 * depending on how many CPUs the kernel has booted up with)
858 */ 858 */
859 unsigned long cpumask[0]; 859 unsigned long cpumask[0];
860 }; 860 };
861 861
862 static inline struct cpumask *sched_group_cpus(struct sched_group *sg) 862 static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
863 { 863 {
864 return to_cpumask(sg->cpumask); 864 return to_cpumask(sg->cpumask);
865 } 865 }
866 866
867 /* 867 /*
868 * cpumask masking which cpus in the group are allowed to iterate up the domain 868 * cpumask masking which cpus in the group are allowed to iterate up the domain
869 * tree. 869 * tree.
870 */ 870 */
871 static inline struct cpumask *sched_group_mask(struct sched_group *sg) 871 static inline struct cpumask *sched_group_mask(struct sched_group *sg)
872 { 872 {
873 return to_cpumask(sg->sgp->cpumask); 873 return to_cpumask(sg->sgp->cpumask);
874 } 874 }
875 875
876 /** 876 /**
877 * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. 877 * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
878 * @group: The group whose first cpu is to be returned. 878 * @group: The group whose first cpu is to be returned.
879 */ 879 */
880 static inline unsigned int group_first_cpu(struct sched_group *group) 880 static inline unsigned int group_first_cpu(struct sched_group *group)
881 { 881 {
882 return cpumask_first(sched_group_cpus(group)); 882 return cpumask_first(sched_group_cpus(group));
883 } 883 }
884 884
885 struct sched_domain_attr { 885 struct sched_domain_attr {
886 int relax_domain_level; 886 int relax_domain_level;
887 }; 887 };
888 888
889 #define SD_ATTR_INIT (struct sched_domain_attr) { \ 889 #define SD_ATTR_INIT (struct sched_domain_attr) { \
890 .relax_domain_level = -1, \ 890 .relax_domain_level = -1, \
891 } 891 }
892 892
893 extern int sched_domain_level_max; 893 extern int sched_domain_level_max;
894 894
895 struct sched_domain { 895 struct sched_domain {
896 /* These fields must be setup */ 896 /* These fields must be setup */
897 struct sched_domain *parent; /* top domain must be null terminated */ 897 struct sched_domain *parent; /* top domain must be null terminated */
898 struct sched_domain *child; /* bottom domain must be null terminated */ 898 struct sched_domain *child; /* bottom domain must be null terminated */
899 struct sched_group *groups; /* the balancing groups of the domain */ 899 struct sched_group *groups; /* the balancing groups of the domain */
900 unsigned long min_interval; /* Minimum balance interval ms */ 900 unsigned long min_interval; /* Minimum balance interval ms */
901 unsigned long max_interval; /* Maximum balance interval ms */ 901 unsigned long max_interval; /* Maximum balance interval ms */
902 unsigned int busy_factor; /* less balancing by factor if busy */ 902 unsigned int busy_factor; /* less balancing by factor if busy */
903 unsigned int imbalance_pct; /* No balance until over watermark */ 903 unsigned int imbalance_pct; /* No balance until over watermark */
904 unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ 904 unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
905 unsigned int busy_idx; 905 unsigned int busy_idx;
906 unsigned int idle_idx; 906 unsigned int idle_idx;
907 unsigned int newidle_idx; 907 unsigned int newidle_idx;
908 unsigned int wake_idx; 908 unsigned int wake_idx;
909 unsigned int forkexec_idx; 909 unsigned int forkexec_idx;
910 unsigned int smt_gain; 910 unsigned int smt_gain;
911 int flags; /* See SD_* */ 911 int flags; /* See SD_* */
912 int level; 912 int level;
913 913
914 /* Runtime fields. */ 914 /* Runtime fields. */
915 unsigned long last_balance; /* init to jiffies. units in jiffies */ 915 unsigned long last_balance; /* init to jiffies. units in jiffies */
916 unsigned int balance_interval; /* initialise to 1. units in ms. */ 916 unsigned int balance_interval; /* initialise to 1. units in ms. */
917 unsigned int nr_balance_failed; /* initialise to 0 */ 917 unsigned int nr_balance_failed; /* initialise to 0 */
918 918
919 u64 last_update; 919 u64 last_update;
920 920
921 #ifdef CONFIG_SCHEDSTATS 921 #ifdef CONFIG_SCHEDSTATS
922 /* load_balance() stats */ 922 /* load_balance() stats */
923 unsigned int lb_count[CPU_MAX_IDLE_TYPES]; 923 unsigned int lb_count[CPU_MAX_IDLE_TYPES];
924 unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; 924 unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
925 unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; 925 unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
926 unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; 926 unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
927 unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; 927 unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
928 unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; 928 unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
929 unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; 929 unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
930 unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; 930 unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
931 931
932 /* Active load balancing */ 932 /* Active load balancing */
933 unsigned int alb_count; 933 unsigned int alb_count;
934 unsigned int alb_failed; 934 unsigned int alb_failed;
935 unsigned int alb_pushed; 935 unsigned int alb_pushed;
936 936
937 /* SD_BALANCE_EXEC stats */ 937 /* SD_BALANCE_EXEC stats */
938 unsigned int sbe_count; 938 unsigned int sbe_count;
939 unsigned int sbe_balanced; 939 unsigned int sbe_balanced;
940 unsigned int sbe_pushed; 940 unsigned int sbe_pushed;
941 941
942 /* SD_BALANCE_FORK stats */ 942 /* SD_BALANCE_FORK stats */
943 unsigned int sbf_count; 943 unsigned int sbf_count;
944 unsigned int sbf_balanced; 944 unsigned int sbf_balanced;
945 unsigned int sbf_pushed; 945 unsigned int sbf_pushed;
946 946
947 /* try_to_wake_up() stats */ 947 /* try_to_wake_up() stats */
948 unsigned int ttwu_wake_remote; 948 unsigned int ttwu_wake_remote;
949 unsigned int ttwu_move_affine; 949 unsigned int ttwu_move_affine;
950 unsigned int ttwu_move_balance; 950 unsigned int ttwu_move_balance;
951 #endif 951 #endif
952 #ifdef CONFIG_SCHED_DEBUG 952 #ifdef CONFIG_SCHED_DEBUG
953 char *name; 953 char *name;
954 #endif 954 #endif
955 union { 955 union {
956 void *private; /* used during construction */ 956 void *private; /* used during construction */
957 struct rcu_head rcu; /* used during destruction */ 957 struct rcu_head rcu; /* used during destruction */
958 }; 958 };
959 959
960 unsigned int span_weight; 960 unsigned int span_weight;
961 /* 961 /*
962 * Span of all CPUs in this domain. 962 * Span of all CPUs in this domain.
963 * 963 *
964 * NOTE: this field is variable length. (Allocated dynamically 964 * NOTE: this field is variable length. (Allocated dynamically
965 * by attaching extra space to the end of the structure, 965 * by attaching extra space to the end of the structure,
966 * depending on how many CPUs the kernel has booted up with) 966 * depending on how many CPUs the kernel has booted up with)
967 */ 967 */
968 unsigned long span[0]; 968 unsigned long span[0];
969 }; 969 };
970 970
971 static inline struct cpumask *sched_domain_span(struct sched_domain *sd) 971 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
972 { 972 {
973 return to_cpumask(sd->span); 973 return to_cpumask(sd->span);
974 } 974 }
975 975
976 extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], 976 extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
977 struct sched_domain_attr *dattr_new); 977 struct sched_domain_attr *dattr_new);
978 978
979 /* Allocate an array of sched domains, for partition_sched_domains(). */ 979 /* Allocate an array of sched domains, for partition_sched_domains(). */
980 cpumask_var_t *alloc_sched_domains(unsigned int ndoms); 980 cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
981 void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); 981 void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
982 982
983 /* Test a flag in parent sched domain */ 983 /* Test a flag in parent sched domain */
984 static inline int test_sd_parent(struct sched_domain *sd, int flag) 984 static inline int test_sd_parent(struct sched_domain *sd, int flag)
985 { 985 {
986 if (sd->parent && (sd->parent->flags & flag)) 986 if (sd->parent && (sd->parent->flags & flag))
987 return 1; 987 return 1;
988 988
989 return 0; 989 return 0;
990 } 990 }
991 991
992 unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); 992 unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu);
993 unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); 993 unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
994 994
995 bool cpus_share_cache(int this_cpu, int that_cpu); 995 bool cpus_share_cache(int this_cpu, int that_cpu);
996 996
997 #else /* CONFIG_SMP */ 997 #else /* CONFIG_SMP */
998 998
999 struct sched_domain_attr; 999 struct sched_domain_attr;
1000 1000
1001 static inline void 1001 static inline void
1002 partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], 1002 partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
1003 struct sched_domain_attr *dattr_new) 1003 struct sched_domain_attr *dattr_new)
1004 { 1004 {
1005 } 1005 }
1006 1006
1007 static inline bool cpus_share_cache(int this_cpu, int that_cpu) 1007 static inline bool cpus_share_cache(int this_cpu, int that_cpu)
1008 { 1008 {
1009 return true; 1009 return true;
1010 } 1010 }
1011 1011
1012 #endif /* !CONFIG_SMP */ 1012 #endif /* !CONFIG_SMP */
1013 1013
1014 1014
1015 struct io_context; /* See blkdev.h */ 1015 struct io_context; /* See blkdev.h */
1016 1016
1017 1017
1018 #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK 1018 #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
1019 extern void prefetch_stack(struct task_struct *t); 1019 extern void prefetch_stack(struct task_struct *t);
1020 #else 1020 #else
1021 static inline void prefetch_stack(struct task_struct *t) { } 1021 static inline void prefetch_stack(struct task_struct *t) { }
1022 #endif 1022 #endif
1023 1023
1024 struct audit_context; /* See audit.c */ 1024 struct audit_context; /* See audit.c */
1025 struct mempolicy; 1025 struct mempolicy;
1026 struct pipe_inode_info; 1026 struct pipe_inode_info;
1027 struct uts_namespace; 1027 struct uts_namespace;
1028 1028
1029 struct rq; 1029 struct rq;
1030 struct sched_domain; 1030 struct sched_domain;
1031 1031
1032 /* 1032 /*
1033 * wake flags 1033 * wake flags
1034 */ 1034 */
1035 #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ 1035 #define WF_SYNC 0x01 /* waker goes to sleep after wakup */
1036 #define WF_FORK 0x02 /* child wakeup after fork */ 1036 #define WF_FORK 0x02 /* child wakeup after fork */
1037 #define WF_MIGRATED 0x04 /* internal use, task got migrated */ 1037 #define WF_MIGRATED 0x04 /* internal use, task got migrated */
1038 1038
1039 #define ENQUEUE_WAKEUP 1 1039 #define ENQUEUE_WAKEUP 1
1040 #define ENQUEUE_HEAD 2 1040 #define ENQUEUE_HEAD 2
1041 #ifdef CONFIG_SMP 1041 #ifdef CONFIG_SMP
1042 #define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ 1042 #define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */
1043 #else 1043 #else
1044 #define ENQUEUE_WAKING 0 1044 #define ENQUEUE_WAKING 0
1045 #endif 1045 #endif
1046 1046
1047 #define DEQUEUE_SLEEP 1 1047 #define DEQUEUE_SLEEP 1
1048 1048
1049 struct sched_class { 1049 struct sched_class {
1050 const struct sched_class *next; 1050 const struct sched_class *next;
1051 1051
1052 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); 1052 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
1053 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); 1053 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
1054 void (*yield_task) (struct rq *rq); 1054 void (*yield_task) (struct rq *rq);
1055 bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); 1055 bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
1056 1056
1057 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); 1057 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
1058 1058
1059 struct task_struct * (*pick_next_task) (struct rq *rq); 1059 struct task_struct * (*pick_next_task) (struct rq *rq);
1060 void (*put_prev_task) (struct rq *rq, struct task_struct *p); 1060 void (*put_prev_task) (struct rq *rq, struct task_struct *p);
1061 1061
1062 #ifdef CONFIG_SMP 1062 #ifdef CONFIG_SMP
1063 int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); 1063 int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
1064 void (*migrate_task_rq)(struct task_struct *p, int next_cpu); 1064 void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
1065 1065
1066 void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); 1066 void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
1067 void (*post_schedule) (struct rq *this_rq); 1067 void (*post_schedule) (struct rq *this_rq);
1068 void (*task_waking) (struct task_struct *task); 1068 void (*task_waking) (struct task_struct *task);
1069 void (*task_woken) (struct rq *this_rq, struct task_struct *task); 1069 void (*task_woken) (struct rq *this_rq, struct task_struct *task);
1070 1070
1071 void (*set_cpus_allowed)(struct task_struct *p, 1071 void (*set_cpus_allowed)(struct task_struct *p,
1072 const struct cpumask *newmask); 1072 const struct cpumask *newmask);
1073 1073
1074 void (*rq_online)(struct rq *rq); 1074 void (*rq_online)(struct rq *rq);
1075 void (*rq_offline)(struct rq *rq); 1075 void (*rq_offline)(struct rq *rq);
1076 #endif 1076 #endif
1077 1077
1078 void (*set_curr_task) (struct rq *rq); 1078 void (*set_curr_task) (struct rq *rq);
1079 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); 1079 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
1080 void (*task_fork) (struct task_struct *p); 1080 void (*task_fork) (struct task_struct *p);
1081 1081
1082 void (*switched_from) (struct rq *this_rq, struct task_struct *task); 1082 void (*switched_from) (struct rq *this_rq, struct task_struct *task);
1083 void (*switched_to) (struct rq *this_rq, struct task_struct *task); 1083 void (*switched_to) (struct rq *this_rq, struct task_struct *task);
1084 void (*prio_changed) (struct rq *this_rq, struct task_struct *task, 1084 void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
1085 int oldprio); 1085 int oldprio);
1086 1086
1087 unsigned int (*get_rr_interval) (struct rq *rq, 1087 unsigned int (*get_rr_interval) (struct rq *rq,
1088 struct task_struct *task); 1088 struct task_struct *task);
1089 1089
1090 #ifdef CONFIG_FAIR_GROUP_SCHED 1090 #ifdef CONFIG_FAIR_GROUP_SCHED
1091 void (*task_move_group) (struct task_struct *p, int on_rq); 1091 void (*task_move_group) (struct task_struct *p, int on_rq);
1092 #endif 1092 #endif
1093 }; 1093 };
1094 1094
1095 struct load_weight { 1095 struct load_weight {
1096 unsigned long weight, inv_weight; 1096 unsigned long weight, inv_weight;
1097 }; 1097 };
1098 1098
1099 struct sched_avg { 1099 struct sched_avg {
1100 /* 1100 /*
1101 * These sums represent an infinite geometric series and so are bound 1101 * These sums represent an infinite geometric series and so are bound
1102 * above by 1024/(1-y). Thus we only need a u32 to store them for for all 1102 * above by 1024/(1-y). Thus we only need a u32 to store them for for all
1103 * choices of y < 1-2^(-32)*1024. 1103 * choices of y < 1-2^(-32)*1024.
1104 */ 1104 */
1105 u32 runnable_avg_sum, runnable_avg_period; 1105 u32 runnable_avg_sum, runnable_avg_period;
1106 u64 last_runnable_update; 1106 u64 last_runnable_update;
1107 s64 decay_count; 1107 s64 decay_count;
1108 unsigned long load_avg_contrib; 1108 unsigned long load_avg_contrib;
1109 }; 1109 };
1110 1110
1111 #ifdef CONFIG_SCHEDSTATS 1111 #ifdef CONFIG_SCHEDSTATS
1112 struct sched_statistics { 1112 struct sched_statistics {
1113 u64 wait_start; 1113 u64 wait_start;
1114 u64 wait_max; 1114 u64 wait_max;
1115 u64 wait_count; 1115 u64 wait_count;
1116 u64 wait_sum; 1116 u64 wait_sum;
1117 u64 iowait_count; 1117 u64 iowait_count;
1118 u64 iowait_sum; 1118 u64 iowait_sum;
1119 1119
1120 u64 sleep_start; 1120 u64 sleep_start;
1121 u64 sleep_max; 1121 u64 sleep_max;
1122 s64 sum_sleep_runtime; 1122 s64 sum_sleep_runtime;
1123 1123
1124 u64 block_start; 1124 u64 block_start;
1125 u64 block_max; 1125 u64 block_max;
1126 u64 exec_max; 1126 u64 exec_max;
1127 u64 slice_max; 1127 u64 slice_max;
1128 1128
1129 u64 nr_migrations_cold; 1129 u64 nr_migrations_cold;
1130 u64 nr_failed_migrations_affine; 1130 u64 nr_failed_migrations_affine;
1131 u64 nr_failed_migrations_running; 1131 u64 nr_failed_migrations_running;
1132 u64 nr_failed_migrations_hot; 1132 u64 nr_failed_migrations_hot;
1133 u64 nr_forced_migrations; 1133 u64 nr_forced_migrations;
1134 1134
1135 u64 nr_wakeups; 1135 u64 nr_wakeups;
1136 u64 nr_wakeups_sync; 1136 u64 nr_wakeups_sync;
1137 u64 nr_wakeups_migrate; 1137 u64 nr_wakeups_migrate;
1138 u64 nr_wakeups_local; 1138 u64 nr_wakeups_local;
1139 u64 nr_wakeups_remote; 1139 u64 nr_wakeups_remote;
1140 u64 nr_wakeups_affine; 1140 u64 nr_wakeups_affine;
1141 u64 nr_wakeups_affine_attempts; 1141 u64 nr_wakeups_affine_attempts;
1142 u64 nr_wakeups_passive; 1142 u64 nr_wakeups_passive;
1143 u64 nr_wakeups_idle; 1143 u64 nr_wakeups_idle;
1144 }; 1144 };
1145 #endif 1145 #endif
1146 1146
1147 struct sched_entity { 1147 struct sched_entity {
1148 struct load_weight load; /* for load-balancing */ 1148 struct load_weight load; /* for load-balancing */
1149 struct rb_node run_node; 1149 struct rb_node run_node;
1150 struct list_head group_node; 1150 struct list_head group_node;
1151 unsigned int on_rq; 1151 unsigned int on_rq;
1152 1152
1153 u64 exec_start; 1153 u64 exec_start;
1154 u64 sum_exec_runtime; 1154 u64 sum_exec_runtime;
1155 u64 vruntime; 1155 u64 vruntime;
1156 u64 prev_sum_exec_runtime; 1156 u64 prev_sum_exec_runtime;
1157 1157
1158 u64 nr_migrations; 1158 u64 nr_migrations;
1159 1159
1160 #ifdef CONFIG_SCHEDSTATS 1160 #ifdef CONFIG_SCHEDSTATS
1161 struct sched_statistics statistics; 1161 struct sched_statistics statistics;
1162 #endif 1162 #endif
1163 1163
1164 #ifdef CONFIG_FAIR_GROUP_SCHED 1164 #ifdef CONFIG_FAIR_GROUP_SCHED
1165 struct sched_entity *parent; 1165 struct sched_entity *parent;
1166 /* rq on which this entity is (to be) queued: */ 1166 /* rq on which this entity is (to be) queued: */
1167 struct cfs_rq *cfs_rq; 1167 struct cfs_rq *cfs_rq;
1168 /* rq "owned" by this entity/group: */ 1168 /* rq "owned" by this entity/group: */
1169 struct cfs_rq *my_q; 1169 struct cfs_rq *my_q;
1170 #endif 1170 #endif
1171 /* 1171 /*
1172 * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be 1172 * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
1173 * removed when useful for applications beyond shares distribution (e.g. 1173 * removed when useful for applications beyond shares distribution (e.g.
1174 * load-balance). 1174 * load-balance).
1175 */ 1175 */
1176 #if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED) 1176 #if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
1177 /* Per-entity load-tracking */ 1177 /* Per-entity load-tracking */
1178 struct sched_avg avg; 1178 struct sched_avg avg;
1179 #endif 1179 #endif
1180 }; 1180 };
1181 1181
1182 struct sched_rt_entity { 1182 struct sched_rt_entity {
1183 struct list_head run_list; 1183 struct list_head run_list;
1184 unsigned long timeout; 1184 unsigned long timeout;
1185 unsigned int time_slice; 1185 unsigned int time_slice;
1186 1186
1187 struct sched_rt_entity *back; 1187 struct sched_rt_entity *back;
1188 #ifdef CONFIG_RT_GROUP_SCHED 1188 #ifdef CONFIG_RT_GROUP_SCHED
1189 struct sched_rt_entity *parent; 1189 struct sched_rt_entity *parent;
1190 /* rq on which this entity is (to be) queued: */ 1190 /* rq on which this entity is (to be) queued: */
1191 struct rt_rq *rt_rq; 1191 struct rt_rq *rt_rq;
1192 /* rq "owned" by this entity/group: */ 1192 /* rq "owned" by this entity/group: */
1193 struct rt_rq *my_q; 1193 struct rt_rq *my_q;
1194 #endif 1194 #endif
1195 }; 1195 };
1196 1196
1197 /* 1197 /*
1198 * default timeslice is 100 msecs (used only for SCHED_RR tasks). 1198 * default timeslice is 100 msecs (used only for SCHED_RR tasks).
1199 * Timeslices get refilled after they expire. 1199 * Timeslices get refilled after they expire.
1200 */ 1200 */
1201 #define RR_TIMESLICE (100 * HZ / 1000) 1201 #define RR_TIMESLICE (100 * HZ / 1000)
1202 1202
1203 struct rcu_node; 1203 struct rcu_node;
1204 1204
1205 enum perf_event_task_context { 1205 enum perf_event_task_context {
1206 perf_invalid_context = -1, 1206 perf_invalid_context = -1,
1207 perf_hw_context = 0, 1207 perf_hw_context = 0,
1208 perf_sw_context, 1208 perf_sw_context,
1209 perf_nr_task_contexts, 1209 perf_nr_task_contexts,
1210 }; 1210 };
1211 1211
1212 struct task_struct { 1212 struct task_struct {
1213 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 1213 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
1214 void *stack; 1214 void *stack;
1215 atomic_t usage; 1215 atomic_t usage;
1216 unsigned int flags; /* per process flags, defined below */ 1216 unsigned int flags; /* per process flags, defined below */
1217 unsigned int ptrace; 1217 unsigned int ptrace;
1218 1218
1219 #ifdef CONFIG_SMP 1219 #ifdef CONFIG_SMP
1220 struct llist_node wake_entry; 1220 struct llist_node wake_entry;
1221 int on_cpu; 1221 int on_cpu;
1222 #endif 1222 #endif
1223 int on_rq; 1223 int on_rq;
1224 1224
1225 int prio, static_prio, normal_prio; 1225 int prio, static_prio, normal_prio;
1226 unsigned int rt_priority; 1226 unsigned int rt_priority;
1227 const struct sched_class *sched_class; 1227 const struct sched_class *sched_class;
1228 struct sched_entity se; 1228 struct sched_entity se;
1229 struct sched_rt_entity rt; 1229 struct sched_rt_entity rt;
1230 #ifdef CONFIG_CGROUP_SCHED 1230 #ifdef CONFIG_CGROUP_SCHED
1231 struct task_group *sched_task_group; 1231 struct task_group *sched_task_group;
1232 #endif 1232 #endif
1233 1233
1234 #ifdef CONFIG_PREEMPT_NOTIFIERS 1234 #ifdef CONFIG_PREEMPT_NOTIFIERS
1235 /* list of struct preempt_notifier: */ 1235 /* list of struct preempt_notifier: */
1236 struct hlist_head preempt_notifiers; 1236 struct hlist_head preempt_notifiers;
1237 #endif 1237 #endif
1238 1238
1239 /* 1239 /*
1240 * fpu_counter contains the number of consecutive context switches 1240 * fpu_counter contains the number of consecutive context switches
1241 * that the FPU is used. If this is over a threshold, the lazy fpu 1241 * that the FPU is used. If this is over a threshold, the lazy fpu
1242 * saving becomes unlazy to save the trap. This is an unsigned char 1242 * saving becomes unlazy to save the trap. This is an unsigned char
1243 * so that after 256 times the counter wraps and the behavior turns 1243 * so that after 256 times the counter wraps and the behavior turns
1244 * lazy again; this to deal with bursty apps that only use FPU for 1244 * lazy again; this to deal with bursty apps that only use FPU for
1245 * a short time 1245 * a short time
1246 */ 1246 */
1247 unsigned char fpu_counter; 1247 unsigned char fpu_counter;
1248 #ifdef CONFIG_BLK_DEV_IO_TRACE 1248 #ifdef CONFIG_BLK_DEV_IO_TRACE
1249 unsigned int btrace_seq; 1249 unsigned int btrace_seq;
1250 #endif 1250 #endif
1251 1251
1252 unsigned int policy; 1252 unsigned int policy;
1253 int nr_cpus_allowed; 1253 int nr_cpus_allowed;
1254 cpumask_t cpus_allowed; 1254 cpumask_t cpus_allowed;
1255 1255
1256 #ifdef CONFIG_PREEMPT_RCU 1256 #ifdef CONFIG_PREEMPT_RCU
1257 int rcu_read_lock_nesting; 1257 int rcu_read_lock_nesting;
1258 char rcu_read_unlock_special; 1258 char rcu_read_unlock_special;
1259 struct list_head rcu_node_entry; 1259 struct list_head rcu_node_entry;
1260 #endif /* #ifdef CONFIG_PREEMPT_RCU */ 1260 #endif /* #ifdef CONFIG_PREEMPT_RCU */
1261 #ifdef CONFIG_TREE_PREEMPT_RCU 1261 #ifdef CONFIG_TREE_PREEMPT_RCU
1262 struct rcu_node *rcu_blocked_node; 1262 struct rcu_node *rcu_blocked_node;
1263 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1263 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1264 #ifdef CONFIG_RCU_BOOST 1264 #ifdef CONFIG_RCU_BOOST
1265 struct rt_mutex *rcu_boost_mutex; 1265 struct rt_mutex *rcu_boost_mutex;
1266 #endif /* #ifdef CONFIG_RCU_BOOST */ 1266 #endif /* #ifdef CONFIG_RCU_BOOST */
1267 1267
1268 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 1268 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1269 struct sched_info sched_info; 1269 struct sched_info sched_info;
1270 #endif 1270 #endif
1271 1271
1272 struct list_head tasks; 1272 struct list_head tasks;
1273 #ifdef CONFIG_SMP 1273 #ifdef CONFIG_SMP
1274 struct plist_node pushable_tasks; 1274 struct plist_node pushable_tasks;
1275 #endif 1275 #endif
1276 1276
1277 struct mm_struct *mm, *active_mm; 1277 struct mm_struct *mm, *active_mm;
1278 #ifdef CONFIG_COMPAT_BRK 1278 #ifdef CONFIG_COMPAT_BRK
1279 unsigned brk_randomized:1; 1279 unsigned brk_randomized:1;
1280 #endif 1280 #endif
1281 #if defined(SPLIT_RSS_COUNTING) 1281 #if defined(SPLIT_RSS_COUNTING)
1282 struct task_rss_stat rss_stat; 1282 struct task_rss_stat rss_stat;
1283 #endif 1283 #endif
1284 /* task state */ 1284 /* task state */
1285 int exit_state; 1285 int exit_state;
1286 int exit_code, exit_signal; 1286 int exit_code, exit_signal;
1287 int pdeath_signal; /* The signal sent when the parent dies */ 1287 int pdeath_signal; /* The signal sent when the parent dies */
1288 unsigned int jobctl; /* JOBCTL_*, siglock protected */ 1288 unsigned int jobctl; /* JOBCTL_*, siglock protected */
1289 /* ??? */ 1289 /* ??? */
1290 unsigned int personality; 1290 unsigned int personality;
1291 unsigned did_exec:1; 1291 unsigned did_exec:1;
1292 unsigned in_execve:1; /* Tell the LSMs that the process is doing an 1292 unsigned in_execve:1; /* Tell the LSMs that the process is doing an
1293 * execve */ 1293 * execve */
1294 unsigned in_iowait:1; 1294 unsigned in_iowait:1;
1295 1295
1296 /* task may not gain privileges */ 1296 /* task may not gain privileges */
1297 unsigned no_new_privs:1; 1297 unsigned no_new_privs:1;
1298 1298
1299 /* Revert to default priority/policy when forking */ 1299 /* Revert to default priority/policy when forking */
1300 unsigned sched_reset_on_fork:1; 1300 unsigned sched_reset_on_fork:1;
1301 unsigned sched_contributes_to_load:1; 1301 unsigned sched_contributes_to_load:1;
1302 1302
1303 pid_t pid; 1303 pid_t pid;
1304 pid_t tgid; 1304 pid_t tgid;
1305 1305
1306 #ifdef CONFIG_CC_STACKPROTECTOR 1306 #ifdef CONFIG_CC_STACKPROTECTOR
1307 /* Canary value for the -fstack-protector gcc feature */ 1307 /* Canary value for the -fstack-protector gcc feature */
1308 unsigned long stack_canary; 1308 unsigned long stack_canary;
1309 #endif 1309 #endif
1310 /* 1310 /*
1311 * pointers to (original) parent process, youngest child, younger sibling, 1311 * pointers to (original) parent process, youngest child, younger sibling,
1312 * older sibling, respectively. (p->father can be replaced with 1312 * older sibling, respectively. (p->father can be replaced with
1313 * p->real_parent->pid) 1313 * p->real_parent->pid)
1314 */ 1314 */
1315 struct task_struct __rcu *real_parent; /* real parent process */ 1315 struct task_struct __rcu *real_parent; /* real parent process */
1316 struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ 1316 struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
1317 /* 1317 /*
1318 * children/sibling forms the list of my natural children 1318 * children/sibling forms the list of my natural children
1319 */ 1319 */
1320 struct list_head children; /* list of my children */ 1320 struct list_head children; /* list of my children */
1321 struct list_head sibling; /* linkage in my parent's children list */ 1321 struct list_head sibling; /* linkage in my parent's children list */
1322 struct task_struct *group_leader; /* threadgroup leader */ 1322 struct task_struct *group_leader; /* threadgroup leader */
1323 1323
1324 /* 1324 /*
1325 * ptraced is the list of tasks this task is using ptrace on. 1325 * ptraced is the list of tasks this task is using ptrace on.
1326 * This includes both natural children and PTRACE_ATTACH targets. 1326 * This includes both natural children and PTRACE_ATTACH targets.
1327 * p->ptrace_entry is p's link on the p->parent->ptraced list. 1327 * p->ptrace_entry is p's link on the p->parent->ptraced list.
1328 */ 1328 */
1329 struct list_head ptraced; 1329 struct list_head ptraced;
1330 struct list_head ptrace_entry; 1330 struct list_head ptrace_entry;
1331 1331
1332 /* PID/PID hash table linkage. */ 1332 /* PID/PID hash table linkage. */
1333 struct pid_link pids[PIDTYPE_MAX]; 1333 struct pid_link pids[PIDTYPE_MAX];
1334 struct list_head thread_group; 1334 struct list_head thread_group;
1335 1335
1336 struct completion *vfork_done; /* for vfork() */ 1336 struct completion *vfork_done; /* for vfork() */
1337 int __user *set_child_tid; /* CLONE_CHILD_SETTID */ 1337 int __user *set_child_tid; /* CLONE_CHILD_SETTID */
1338 int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ 1338 int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
1339 1339
1340 cputime_t utime, stime, utimescaled, stimescaled; 1340 cputime_t utime, stime, utimescaled, stimescaled;
1341 cputime_t gtime; 1341 cputime_t gtime;
1342 #ifndef CONFIG_VIRT_CPU_ACCOUNTING 1342 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
1343 cputime_t prev_utime, prev_stime; 1343 cputime_t prev_utime, prev_stime;
1344 #endif 1344 #endif
1345 unsigned long nvcsw, nivcsw; /* context switch counts */ 1345 unsigned long nvcsw, nivcsw; /* context switch counts */
1346 struct timespec start_time; /* monotonic time */ 1346 struct timespec start_time; /* monotonic time */
1347 struct timespec real_start_time; /* boot based time */ 1347 struct timespec real_start_time; /* boot based time */
1348 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ 1348 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
1349 unsigned long min_flt, maj_flt; 1349 unsigned long min_flt, maj_flt;
1350 1350
1351 struct task_cputime cputime_expires; 1351 struct task_cputime cputime_expires;
1352 struct list_head cpu_timers[3]; 1352 struct list_head cpu_timers[3];
1353 1353
1354 /* process credentials */ 1354 /* process credentials */
1355 const struct cred __rcu *real_cred; /* objective and real subjective task 1355 const struct cred __rcu *real_cred; /* objective and real subjective task
1356 * credentials (COW) */ 1356 * credentials (COW) */
1357 const struct cred __rcu *cred; /* effective (overridable) subjective task 1357 const struct cred __rcu *cred; /* effective (overridable) subjective task
1358 * credentials (COW) */ 1358 * credentials (COW) */
1359 char comm[TASK_COMM_LEN]; /* executable name excluding path 1359 char comm[TASK_COMM_LEN]; /* executable name excluding path
1360 - access with [gs]et_task_comm (which lock 1360 - access with [gs]et_task_comm (which lock
1361 it with task_lock()) 1361 it with task_lock())
1362 - initialized normally by setup_new_exec */ 1362 - initialized normally by setup_new_exec */
1363 /* file system info */ 1363 /* file system info */
1364 int link_count, total_link_count; 1364 int link_count, total_link_count;
1365 #ifdef CONFIG_SYSVIPC 1365 #ifdef CONFIG_SYSVIPC
1366 /* ipc stuff */ 1366 /* ipc stuff */
1367 struct sysv_sem sysvsem; 1367 struct sysv_sem sysvsem;
1368 #endif 1368 #endif
1369 #ifdef CONFIG_DETECT_HUNG_TASK 1369 #ifdef CONFIG_DETECT_HUNG_TASK
1370 /* hung task detection */ 1370 /* hung task detection */
1371 unsigned long last_switch_count; 1371 unsigned long last_switch_count;
1372 #endif 1372 #endif
1373 /* CPU-specific state of this task */ 1373 /* CPU-specific state of this task */
1374 struct thread_struct thread; 1374 struct thread_struct thread;
1375 /* filesystem information */ 1375 /* filesystem information */
1376 struct fs_struct *fs; 1376 struct fs_struct *fs;
1377 /* open file information */ 1377 /* open file information */
1378 struct files_struct *files; 1378 struct files_struct *files;
1379 /* namespaces */ 1379 /* namespaces */
1380 struct nsproxy *nsproxy; 1380 struct nsproxy *nsproxy;
1381 /* signal handlers */ 1381 /* signal handlers */
1382 struct signal_struct *signal; 1382 struct signal_struct *signal;
1383 struct sighand_struct *sighand; 1383 struct sighand_struct *sighand;
1384 1384
1385 sigset_t blocked, real_blocked; 1385 sigset_t blocked, real_blocked;
1386 sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ 1386 sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
1387 struct sigpending pending; 1387 struct sigpending pending;
1388 1388
1389 unsigned long sas_ss_sp; 1389 unsigned long sas_ss_sp;
1390 size_t sas_ss_size; 1390 size_t sas_ss_size;
1391 int (*notifier)(void *priv); 1391 int (*notifier)(void *priv);
1392 void *notifier_data; 1392 void *notifier_data;
1393 sigset_t *notifier_mask; 1393 sigset_t *notifier_mask;
1394 struct callback_head *task_works; 1394 struct callback_head *task_works;
1395 1395
1396 struct audit_context *audit_context; 1396 struct audit_context *audit_context;
1397 #ifdef CONFIG_AUDITSYSCALL 1397 #ifdef CONFIG_AUDITSYSCALL
1398 kuid_t loginuid; 1398 kuid_t loginuid;
1399 unsigned int sessionid; 1399 unsigned int sessionid;
1400 #endif 1400 #endif
1401 struct seccomp seccomp; 1401 struct seccomp seccomp;
1402 1402
1403 /* Thread group tracking */ 1403 /* Thread group tracking */
1404 u32 parent_exec_id; 1404 u32 parent_exec_id;
1405 u32 self_exec_id; 1405 u32 self_exec_id;
1406 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, 1406 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
1407 * mempolicy */ 1407 * mempolicy */
1408 spinlock_t alloc_lock; 1408 spinlock_t alloc_lock;
1409 1409
1410 /* Protection of the PI data structures: */ 1410 /* Protection of the PI data structures: */
1411 raw_spinlock_t pi_lock; 1411 raw_spinlock_t pi_lock;
1412 1412
1413 #ifdef CONFIG_RT_MUTEXES 1413 #ifdef CONFIG_RT_MUTEXES
1414 /* PI waiters blocked on a rt_mutex held by this task */ 1414 /* PI waiters blocked on a rt_mutex held by this task */
1415 struct plist_head pi_waiters; 1415 struct plist_head pi_waiters;
1416 /* Deadlock detection and priority inheritance handling */ 1416 /* Deadlock detection and priority inheritance handling */
1417 struct rt_mutex_waiter *pi_blocked_on; 1417 struct rt_mutex_waiter *pi_blocked_on;
1418 #endif 1418 #endif
1419 1419
1420 #ifdef CONFIG_DEBUG_MUTEXES 1420 #ifdef CONFIG_DEBUG_MUTEXES
1421 /* mutex deadlock detection */ 1421 /* mutex deadlock detection */
1422 struct mutex_waiter *blocked_on; 1422 struct mutex_waiter *blocked_on;
1423 #endif 1423 #endif
1424 #ifdef CONFIG_TRACE_IRQFLAGS 1424 #ifdef CONFIG_TRACE_IRQFLAGS
1425 unsigned int irq_events; 1425 unsigned int irq_events;
1426 unsigned long hardirq_enable_ip; 1426 unsigned long hardirq_enable_ip;
1427 unsigned long hardirq_disable_ip; 1427 unsigned long hardirq_disable_ip;
1428 unsigned int hardirq_enable_event; 1428 unsigned int hardirq_enable_event;
1429 unsigned int hardirq_disable_event; 1429 unsigned int hardirq_disable_event;
1430 int hardirqs_enabled; 1430 int hardirqs_enabled;
1431 int hardirq_context; 1431 int hardirq_context;
1432 unsigned long softirq_disable_ip; 1432 unsigned long softirq_disable_ip;
1433 unsigned long softirq_enable_ip; 1433 unsigned long softirq_enable_ip;
1434 unsigned int softirq_disable_event; 1434 unsigned int softirq_disable_event;
1435 unsigned int softirq_enable_event; 1435 unsigned int softirq_enable_event;
1436 int softirqs_enabled; 1436 int softirqs_enabled;
1437 int softirq_context; 1437 int softirq_context;
1438 #endif 1438 #endif
1439 #ifdef CONFIG_LOCKDEP 1439 #ifdef CONFIG_LOCKDEP
1440 # define MAX_LOCK_DEPTH 48UL 1440 # define MAX_LOCK_DEPTH 48UL
1441 u64 curr_chain_key; 1441 u64 curr_chain_key;
1442 int lockdep_depth; 1442 int lockdep_depth;
1443 unsigned int lockdep_recursion; 1443 unsigned int lockdep_recursion;
1444 struct held_lock held_locks[MAX_LOCK_DEPTH]; 1444 struct held_lock held_locks[MAX_LOCK_DEPTH];
1445 gfp_t lockdep_reclaim_gfp; 1445 gfp_t lockdep_reclaim_gfp;
1446 #endif 1446 #endif
1447 1447
1448 /* journalling filesystem info */ 1448 /* journalling filesystem info */
1449 void *journal_info; 1449 void *journal_info;
1450 1450
1451 /* stacked block device info */ 1451 /* stacked block device info */
1452 struct bio_list *bio_list; 1452 struct bio_list *bio_list;
1453 1453
1454 #ifdef CONFIG_BLOCK 1454 #ifdef CONFIG_BLOCK
1455 /* stack plugging */ 1455 /* stack plugging */
1456 struct blk_plug *plug; 1456 struct blk_plug *plug;
1457 #endif 1457 #endif
1458 1458
1459 /* VM state */ 1459 /* VM state */
1460 struct reclaim_state *reclaim_state; 1460 struct reclaim_state *reclaim_state;
1461 1461
1462 struct backing_dev_info *backing_dev_info; 1462 struct backing_dev_info *backing_dev_info;
1463 1463
1464 struct io_context *io_context; 1464 struct io_context *io_context;
1465 1465
1466 unsigned long ptrace_message; 1466 unsigned long ptrace_message;
1467 siginfo_t *last_siginfo; /* For ptrace use. */ 1467 siginfo_t *last_siginfo; /* For ptrace use. */
1468 struct task_io_accounting ioac; 1468 struct task_io_accounting ioac;
1469 #if defined(CONFIG_TASK_XACCT) 1469 #if defined(CONFIG_TASK_XACCT)
1470 u64 acct_rss_mem1; /* accumulated rss usage */ 1470 u64 acct_rss_mem1; /* accumulated rss usage */
1471 u64 acct_vm_mem1; /* accumulated virtual memory usage */ 1471 u64 acct_vm_mem1; /* accumulated virtual memory usage */
1472 cputime_t acct_timexpd; /* stime + utime since last update */ 1472 cputime_t acct_timexpd; /* stime + utime since last update */
1473 #endif 1473 #endif
1474 #ifdef CONFIG_CPUSETS 1474 #ifdef CONFIG_CPUSETS
1475 nodemask_t mems_allowed; /* Protected by alloc_lock */ 1475 nodemask_t mems_allowed; /* Protected by alloc_lock */
1476 seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ 1476 seqcount_t mems_allowed_seq; /* Seqence no to catch updates */
1477 int cpuset_mem_spread_rotor; 1477 int cpuset_mem_spread_rotor;
1478 int cpuset_slab_spread_rotor; 1478 int cpuset_slab_spread_rotor;
1479 #endif 1479 #endif
1480 #ifdef CONFIG_CGROUPS 1480 #ifdef CONFIG_CGROUPS
1481 /* Control Group info protected by css_set_lock */ 1481 /* Control Group info protected by css_set_lock */
1482 struct css_set __rcu *cgroups; 1482 struct css_set __rcu *cgroups;
1483 /* cg_list protected by css_set_lock and tsk->alloc_lock */ 1483 /* cg_list protected by css_set_lock and tsk->alloc_lock */
1484 struct list_head cg_list; 1484 struct list_head cg_list;
1485 #endif 1485 #endif
1486 #ifdef CONFIG_FUTEX 1486 #ifdef CONFIG_FUTEX
1487 struct robust_list_head __user *robust_list; 1487 struct robust_list_head __user *robust_list;
1488 #ifdef CONFIG_COMPAT 1488 #ifdef CONFIG_COMPAT
1489 struct compat_robust_list_head __user *compat_robust_list; 1489 struct compat_robust_list_head __user *compat_robust_list;
1490 #endif 1490 #endif
1491 struct list_head pi_state_list; 1491 struct list_head pi_state_list;
1492 struct futex_pi_state *pi_state_cache; 1492 struct futex_pi_state *pi_state_cache;
1493 #endif 1493 #endif
1494 #ifdef CONFIG_PERF_EVENTS 1494 #ifdef CONFIG_PERF_EVENTS
1495 struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; 1495 struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1496 struct mutex perf_event_mutex; 1496 struct mutex perf_event_mutex;
1497 struct list_head perf_event_list; 1497 struct list_head perf_event_list;
1498 #endif 1498 #endif
1499 #ifdef CONFIG_NUMA 1499 #ifdef CONFIG_NUMA
1500 struct mempolicy *mempolicy; /* Protected by alloc_lock */ 1500 struct mempolicy *mempolicy; /* Protected by alloc_lock */
1501 short il_next; 1501 short il_next;
1502 short pref_node_fork; 1502 short pref_node_fork;
1503 #endif 1503 #endif
1504 struct rcu_head rcu; 1504 struct rcu_head rcu;
1505 1505
1506 /* 1506 /*
1507 * cache last used pipe for splice 1507 * cache last used pipe for splice
1508 */ 1508 */
1509 struct pipe_inode_info *splice_pipe; 1509 struct pipe_inode_info *splice_pipe;
1510 1510
1511 struct page_frag task_frag; 1511 struct page_frag task_frag;
1512 1512
1513 #ifdef CONFIG_TASK_DELAY_ACCT 1513 #ifdef CONFIG_TASK_DELAY_ACCT
1514 struct task_delay_info *delays; 1514 struct task_delay_info *delays;
1515 #endif 1515 #endif
1516 #ifdef CONFIG_FAULT_INJECTION 1516 #ifdef CONFIG_FAULT_INJECTION
1517 int make_it_fail; 1517 int make_it_fail;
1518 #endif 1518 #endif
1519 /* 1519 /*
1520 * when (nr_dirtied >= nr_dirtied_pause), it's time to call 1520 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
1521 * balance_dirty_pages() for some dirty throttling pause 1521 * balance_dirty_pages() for some dirty throttling pause
1522 */ 1522 */
1523 int nr_dirtied; 1523 int nr_dirtied;
1524 int nr_dirtied_pause; 1524 int nr_dirtied_pause;
1525 unsigned long dirty_paused_when; /* start of a write-and-pause period */ 1525 unsigned long dirty_paused_when; /* start of a write-and-pause period */
1526 1526
1527 #ifdef CONFIG_LATENCYTOP 1527 #ifdef CONFIG_LATENCYTOP
1528 int latency_record_count; 1528 int latency_record_count;
1529 struct latency_record latency_record[LT_SAVECOUNT]; 1529 struct latency_record latency_record[LT_SAVECOUNT];
1530 #endif 1530 #endif
1531 /* 1531 /*
1532 * time slack values; these are used to round up poll() and 1532 * time slack values; these are used to round up poll() and
1533 * select() etc timeout values. These are in nanoseconds. 1533 * select() etc timeout values. These are in nanoseconds.
1534 */ 1534 */
1535 unsigned long timer_slack_ns; 1535 unsigned long timer_slack_ns;
1536 unsigned long default_timer_slack_ns; 1536 unsigned long default_timer_slack_ns;
1537 1537
1538 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 1538 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
1539 /* Index of current stored address in ret_stack */ 1539 /* Index of current stored address in ret_stack */
1540 int curr_ret_stack; 1540 int curr_ret_stack;
1541 /* Stack of return addresses for return function tracing */ 1541 /* Stack of return addresses for return function tracing */
1542 struct ftrace_ret_stack *ret_stack; 1542 struct ftrace_ret_stack *ret_stack;
1543 /* time stamp for last schedule */ 1543 /* time stamp for last schedule */
1544 unsigned long long ftrace_timestamp; 1544 unsigned long long ftrace_timestamp;
1545 /* 1545 /*
1546 * Number of functions that haven't been traced 1546 * Number of functions that haven't been traced
1547 * because of depth overrun. 1547 * because of depth overrun.
1548 */ 1548 */
1549 atomic_t trace_overrun; 1549 atomic_t trace_overrun;
1550 /* Pause for the tracing */ 1550 /* Pause for the tracing */
1551 atomic_t tracing_graph_pause; 1551 atomic_t tracing_graph_pause;
1552 #endif 1552 #endif
1553 #ifdef CONFIG_TRACING 1553 #ifdef CONFIG_TRACING
1554 /* state flags for use by tracers */ 1554 /* state flags for use by tracers */
1555 unsigned long trace; 1555 unsigned long trace;
1556 /* bitmask and counter of trace recursion */ 1556 /* bitmask and counter of trace recursion */
1557 unsigned long trace_recursion; 1557 unsigned long trace_recursion;
1558 #endif /* CONFIG_TRACING */ 1558 #endif /* CONFIG_TRACING */
1559 #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ 1559 #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
1560 struct memcg_batch_info { 1560 struct memcg_batch_info {
1561 int do_batch; /* incremented when batch uncharge started */ 1561 int do_batch; /* incremented when batch uncharge started */
1562 struct mem_cgroup *memcg; /* target memcg of uncharge */ 1562 struct mem_cgroup *memcg; /* target memcg of uncharge */
1563 unsigned long nr_pages; /* uncharged usage */ 1563 unsigned long nr_pages; /* uncharged usage */
1564 unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ 1564 unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
1565 } memcg_batch; 1565 } memcg_batch;
1566 #endif 1566 #endif
1567 #ifdef CONFIG_HAVE_HW_BREAKPOINT 1567 #ifdef CONFIG_HAVE_HW_BREAKPOINT
1568 atomic_t ptrace_bp_refcnt; 1568 atomic_t ptrace_bp_refcnt;
1569 #endif 1569 #endif
1570 #ifdef CONFIG_UPROBES 1570 #ifdef CONFIG_UPROBES
1571 struct uprobe_task *utask; 1571 struct uprobe_task *utask;
1572 #endif 1572 #endif
1573 }; 1573 };
1574 1574
1575 /* Future-safe accessor for struct task_struct's cpus_allowed. */ 1575 /* Future-safe accessor for struct task_struct's cpus_allowed. */
1576 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) 1576 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
1577 1577
1578 /* 1578 /*
1579 * Priority of a process goes from 0..MAX_PRIO-1, valid RT 1579 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
1580 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH 1580 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
1581 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority 1581 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
1582 * values are inverted: lower p->prio value means higher priority. 1582 * values are inverted: lower p->prio value means higher priority.
1583 * 1583 *
1584 * The MAX_USER_RT_PRIO value allows the actual maximum 1584 * The MAX_USER_RT_PRIO value allows the actual maximum
1585 * RT priority to be separate from the value exported to 1585 * RT priority to be separate from the value exported to
1586 * user-space. This allows kernel threads to set their 1586 * user-space. This allows kernel threads to set their
1587 * priority to a value higher than any user task. Note: 1587 * priority to a value higher than any user task. Note:
1588 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. 1588 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
1589 */ 1589 */
1590 1590
1591 #define MAX_USER_RT_PRIO 100 1591 #define MAX_USER_RT_PRIO 100
1592 #define MAX_RT_PRIO MAX_USER_RT_PRIO 1592 #define MAX_RT_PRIO MAX_USER_RT_PRIO
1593 1593
1594 #define MAX_PRIO (MAX_RT_PRIO + 40) 1594 #define MAX_PRIO (MAX_RT_PRIO + 40)
1595 #define DEFAULT_PRIO (MAX_RT_PRIO + 20) 1595 #define DEFAULT_PRIO (MAX_RT_PRIO + 20)
1596 1596
1597 static inline int rt_prio(int prio) 1597 static inline int rt_prio(int prio)
1598 { 1598 {
1599 if (unlikely(prio < MAX_RT_PRIO)) 1599 if (unlikely(prio < MAX_RT_PRIO))
1600 return 1; 1600 return 1;
1601 return 0; 1601 return 0;
1602 } 1602 }
1603 1603
1604 static inline int rt_task(struct task_struct *p) 1604 static inline int rt_task(struct task_struct *p)
1605 { 1605 {
1606 return rt_prio(p->prio); 1606 return rt_prio(p->prio);
1607 } 1607 }
1608 1608
1609 static inline struct pid *task_pid(struct task_struct *task) 1609 static inline struct pid *task_pid(struct task_struct *task)
1610 { 1610 {
1611 return task->pids[PIDTYPE_PID].pid; 1611 return task->pids[PIDTYPE_PID].pid;
1612 } 1612 }
1613 1613
1614 static inline struct pid *task_tgid(struct task_struct *task) 1614 static inline struct pid *task_tgid(struct task_struct *task)
1615 { 1615 {
1616 return task->group_leader->pids[PIDTYPE_PID].pid; 1616 return task->group_leader->pids[PIDTYPE_PID].pid;
1617 } 1617 }
1618 1618
1619 /* 1619 /*
1620 * Without tasklist or rcu lock it is not safe to dereference 1620 * Without tasklist or rcu lock it is not safe to dereference
1621 * the result of task_pgrp/task_session even if task == current, 1621 * the result of task_pgrp/task_session even if task == current,
1622 * we can race with another thread doing sys_setsid/sys_setpgid. 1622 * we can race with another thread doing sys_setsid/sys_setpgid.
1623 */ 1623 */
1624 static inline struct pid *task_pgrp(struct task_struct *task) 1624 static inline struct pid *task_pgrp(struct task_struct *task)
1625 { 1625 {
1626 return task->group_leader->pids[PIDTYPE_PGID].pid; 1626 return task->group_leader->pids[PIDTYPE_PGID].pid;
1627 } 1627 }
1628 1628
1629 static inline struct pid *task_session(struct task_struct *task) 1629 static inline struct pid *task_session(struct task_struct *task)
1630 { 1630 {
1631 return task->group_leader->pids[PIDTYPE_SID].pid; 1631 return task->group_leader->pids[PIDTYPE_SID].pid;
1632 } 1632 }
1633 1633
1634 struct pid_namespace; 1634 struct pid_namespace;
1635 1635
1636 /* 1636 /*
1637 * the helpers to get the task's different pids as they are seen 1637 * the helpers to get the task's different pids as they are seen
1638 * from various namespaces 1638 * from various namespaces
1639 * 1639 *
1640 * task_xid_nr() : global id, i.e. the id seen from the init namespace; 1640 * task_xid_nr() : global id, i.e. the id seen from the init namespace;
1641 * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of 1641 * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of
1642 * current. 1642 * current.
1643 * task_xid_nr_ns() : id seen from the ns specified; 1643 * task_xid_nr_ns() : id seen from the ns specified;
1644 * 1644 *
1645 * set_task_vxid() : assigns a virtual id to a task; 1645 * set_task_vxid() : assigns a virtual id to a task;
1646 * 1646 *
1647 * see also pid_nr() etc in include/linux/pid.h 1647 * see also pid_nr() etc in include/linux/pid.h
1648 */ 1648 */
1649 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, 1649 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
1650 struct pid_namespace *ns); 1650 struct pid_namespace *ns);
1651 1651
1652 static inline pid_t task_pid_nr(struct task_struct *tsk) 1652 static inline pid_t task_pid_nr(struct task_struct *tsk)
1653 { 1653 {
1654 return tsk->pid; 1654 return tsk->pid;
1655 } 1655 }
1656 1656
1657 static inline pid_t task_pid_nr_ns(struct task_struct *tsk, 1657 static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
1658 struct pid_namespace *ns) 1658 struct pid_namespace *ns)
1659 { 1659 {
1660 return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); 1660 return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
1661 } 1661 }
1662 1662
1663 static inline pid_t task_pid_vnr(struct task_struct *tsk) 1663 static inline pid_t task_pid_vnr(struct task_struct *tsk)
1664 { 1664 {
1665 return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); 1665 return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
1666 } 1666 }
1667 1667
1668 1668
1669 static inline pid_t task_tgid_nr(struct task_struct *tsk) 1669 static inline pid_t task_tgid_nr(struct task_struct *tsk)
1670 { 1670 {
1671 return tsk->tgid; 1671 return tsk->tgid;
1672 } 1672 }
1673 1673
1674 pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); 1674 pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
1675 1675
1676 static inline pid_t task_tgid_vnr(struct task_struct *tsk) 1676 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
1677 { 1677 {
1678 return pid_vnr(task_tgid(tsk)); 1678 return pid_vnr(task_tgid(tsk));
1679 } 1679 }
1680 1680
1681 1681
1682 static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, 1682 static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
1683 struct pid_namespace *ns) 1683 struct pid_namespace *ns)
1684 { 1684 {
1685 return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); 1685 return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
1686 } 1686 }
1687 1687
1688 static inline pid_t task_pgrp_vnr(struct task_struct *tsk) 1688 static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
1689 { 1689 {
1690 return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); 1690 return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
1691 } 1691 }
1692 1692
1693 1693
1694 static inline pid_t task_session_nr_ns(struct task_struct *tsk, 1694 static inline pid_t task_session_nr_ns(struct task_struct *tsk,
1695 struct pid_namespace *ns) 1695 struct pid_namespace *ns)
1696 { 1696 {
1697 return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); 1697 return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
1698 } 1698 }
1699 1699
1700 static inline pid_t task_session_vnr(struct task_struct *tsk) 1700 static inline pid_t task_session_vnr(struct task_struct *tsk)
1701 { 1701 {
1702 return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); 1702 return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
1703 } 1703 }
1704 1704
1705 /* obsolete, do not use */ 1705 /* obsolete, do not use */
1706 static inline pid_t task_pgrp_nr(struct task_struct *tsk) 1706 static inline pid_t task_pgrp_nr(struct task_struct *tsk)
1707 { 1707 {
1708 return task_pgrp_nr_ns(tsk, &init_pid_ns); 1708 return task_pgrp_nr_ns(tsk, &init_pid_ns);
1709 } 1709 }
1710 1710
1711 /** 1711 /**
1712 * pid_alive - check that a task structure is not stale 1712 * pid_alive - check that a task structure is not stale
1713 * @p: Task structure to be checked. 1713 * @p: Task structure to be checked.
1714 * 1714 *
1715 * Test if a process is not yet dead (at most zombie state) 1715 * Test if a process is not yet dead (at most zombie state)
1716 * If pid_alive fails, then pointers within the task structure 1716 * If pid_alive fails, then pointers within the task structure
1717 * can be stale and must not be dereferenced. 1717 * can be stale and must not be dereferenced.
1718 */ 1718 */
1719 static inline int pid_alive(struct task_struct *p) 1719 static inline int pid_alive(struct task_struct *p)
1720 { 1720 {
1721 return p->pids[PIDTYPE_PID].pid != NULL; 1721 return p->pids[PIDTYPE_PID].pid != NULL;
1722 } 1722 }
1723 1723
1724 /** 1724 /**
1725 * is_global_init - check if a task structure is init 1725 * is_global_init - check if a task structure is init
1726 * @tsk: Task structure to be checked. 1726 * @tsk: Task structure to be checked.
1727 * 1727 *
1728 * Check if a task structure is the first user space task the kernel created. 1728 * Check if a task structure is the first user space task the kernel created.
1729 */ 1729 */
1730 static inline int is_global_init(struct task_struct *tsk) 1730 static inline int is_global_init(struct task_struct *tsk)
1731 { 1731 {
1732 return tsk->pid == 1; 1732 return tsk->pid == 1;
1733 } 1733 }
1734 1734
1735 /* 1735 /*
1736 * is_container_init: 1736 * is_container_init:
1737 * check whether in the task is init in its own pid namespace. 1737 * check whether in the task is init in its own pid namespace.
1738 */ 1738 */
1739 extern int is_container_init(struct task_struct *tsk); 1739 extern int is_container_init(struct task_struct *tsk);
1740 1740
1741 extern struct pid *cad_pid; 1741 extern struct pid *cad_pid;
1742 1742
1743 extern void free_task(struct task_struct *tsk); 1743 extern void free_task(struct task_struct *tsk);
1744 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) 1744 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
1745 1745
1746 extern void __put_task_struct(struct task_struct *t); 1746 extern void __put_task_struct(struct task_struct *t);
1747 1747
1748 static inline void put_task_struct(struct task_struct *t) 1748 static inline void put_task_struct(struct task_struct *t)
1749 { 1749 {
1750 if (atomic_dec_and_test(&t->usage)) 1750 if (atomic_dec_and_test(&t->usage))
1751 __put_task_struct(t); 1751 __put_task_struct(t);
1752 } 1752 }
1753 1753
1754 extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); 1754 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
1755 extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st); 1755 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
1756 1756
1757 /* 1757 /*
1758 * Per process flags 1758 * Per process flags
1759 */ 1759 */
1760 #define PF_EXITING 0x00000004 /* getting shut down */ 1760 #define PF_EXITING 0x00000004 /* getting shut down */
1761 #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ 1761 #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
1762 #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ 1762 #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
1763 #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ 1763 #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
1764 #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ 1764 #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
1765 #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ 1765 #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
1766 #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ 1766 #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
1767 #define PF_DUMPCORE 0x00000200 /* dumped core */ 1767 #define PF_DUMPCORE 0x00000200 /* dumped core */
1768 #define PF_SIGNALED 0x00000400 /* killed by a signal */ 1768 #define PF_SIGNALED 0x00000400 /* killed by a signal */
1769 #define PF_MEMALLOC 0x00000800 /* Allocating memory */ 1769 #define PF_MEMALLOC 0x00000800 /* Allocating memory */
1770 #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ 1770 #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */
1771 #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ 1771 #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */
1772 #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ 1772 #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */
1773 #define PF_FROZEN 0x00010000 /* frozen for system suspend */ 1773 #define PF_FROZEN 0x00010000 /* frozen for system suspend */
1774 #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ 1774 #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */
1775 #define PF_KSWAPD 0x00040000 /* I am kswapd */ 1775 #define PF_KSWAPD 0x00040000 /* I am kswapd */
1776 #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ 1776 #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
1777 #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ 1777 #define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1778 #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ 1778 #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
1779 #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ 1779 #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1780 #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ 1780 #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
1781 #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ 1781 #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
1782 #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ 1782 #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
1783 #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ 1783 #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1784 #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ 1784 #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
1785 #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ 1785 #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
1786 #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ 1786 #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
1787 1787
1788 /* 1788 /*
1789 * Only the _current_ task can read/write to tsk->flags, but other 1789 * Only the _current_ task can read/write to tsk->flags, but other
1790 * tasks can access tsk->flags in readonly mode for example 1790 * tasks can access tsk->flags in readonly mode for example
1791 * with tsk_used_math (like during threaded core dumping). 1791 * with tsk_used_math (like during threaded core dumping).
1792 * There is however an exception to this rule during ptrace 1792 * There is however an exception to this rule during ptrace
1793 * or during fork: the ptracer task is allowed to write to the 1793 * or during fork: the ptracer task is allowed to write to the
1794 * child->flags of its traced child (same goes for fork, the parent 1794 * child->flags of its traced child (same goes for fork, the parent
1795 * can write to the child->flags), because we're guaranteed the 1795 * can write to the child->flags), because we're guaranteed the
1796 * child is not running and in turn not changing child->flags 1796 * child is not running and in turn not changing child->flags
1797 * at the same time the parent does it. 1797 * at the same time the parent does it.
1798 */ 1798 */
1799 #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) 1799 #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
1800 #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) 1800 #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
1801 #define clear_used_math() clear_stopped_child_used_math(current) 1801 #define clear_used_math() clear_stopped_child_used_math(current)
1802 #define set_used_math() set_stopped_child_used_math(current) 1802 #define set_used_math() set_stopped_child_used_math(current)
1803 #define conditional_stopped_child_used_math(condition, child) \ 1803 #define conditional_stopped_child_used_math(condition, child) \
1804 do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) 1804 do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
1805 #define conditional_used_math(condition) \ 1805 #define conditional_used_math(condition) \
1806 conditional_stopped_child_used_math(condition, current) 1806 conditional_stopped_child_used_math(condition, current)
1807 #define copy_to_stopped_child_used_math(child) \ 1807 #define copy_to_stopped_child_used_math(child) \
1808 do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) 1808 do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
1809 /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ 1809 /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
1810 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) 1810 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1811 #define used_math() tsk_used_math(current) 1811 #define used_math() tsk_used_math(current)
1812 1812
1813 /* 1813 /*
1814 * task->jobctl flags 1814 * task->jobctl flags
1815 */ 1815 */
1816 #define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ 1816 #define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */
1817 1817
1818 #define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ 1818 #define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */
1819 #define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ 1819 #define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */
1820 #define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ 1820 #define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */
1821 #define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ 1821 #define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */
1822 #define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ 1822 #define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */
1823 #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ 1823 #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
1824 #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ 1824 #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
1825 1825
1826 #define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT) 1826 #define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT)
1827 #define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT) 1827 #define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT)
1828 #define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT) 1828 #define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT)
1829 #define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT) 1829 #define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT)
1830 #define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT) 1830 #define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT)
1831 #define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT) 1831 #define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT)
1832 #define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT) 1832 #define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT)
1833 1833
1834 #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) 1834 #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
1835 #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) 1835 #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
1836 1836
1837 extern bool task_set_jobctl_pending(struct task_struct *task, 1837 extern bool task_set_jobctl_pending(struct task_struct *task,
1838 unsigned int mask); 1838 unsigned int mask);
1839 extern void task_clear_jobctl_trapping(struct task_struct *task); 1839 extern void task_clear_jobctl_trapping(struct task_struct *task);
1840 extern void task_clear_jobctl_pending(struct task_struct *task, 1840 extern void task_clear_jobctl_pending(struct task_struct *task,
1841 unsigned int mask); 1841 unsigned int mask);
1842 1842
1843 #ifdef CONFIG_PREEMPT_RCU 1843 #ifdef CONFIG_PREEMPT_RCU
1844 1844
1845 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ 1845 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
1846 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ 1846 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
1847 1847
1848 static inline void rcu_copy_process(struct task_struct *p) 1848 static inline void rcu_copy_process(struct task_struct *p)
1849 { 1849 {
1850 p->rcu_read_lock_nesting = 0; 1850 p->rcu_read_lock_nesting = 0;
1851 p->rcu_read_unlock_special = 0; 1851 p->rcu_read_unlock_special = 0;
1852 #ifdef CONFIG_TREE_PREEMPT_RCU 1852 #ifdef CONFIG_TREE_PREEMPT_RCU
1853 p->rcu_blocked_node = NULL; 1853 p->rcu_blocked_node = NULL;
1854 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1854 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1855 #ifdef CONFIG_RCU_BOOST 1855 #ifdef CONFIG_RCU_BOOST
1856 p->rcu_boost_mutex = NULL; 1856 p->rcu_boost_mutex = NULL;
1857 #endif /* #ifdef CONFIG_RCU_BOOST */ 1857 #endif /* #ifdef CONFIG_RCU_BOOST */
1858 INIT_LIST_HEAD(&p->rcu_node_entry); 1858 INIT_LIST_HEAD(&p->rcu_node_entry);
1859 } 1859 }
1860 1860
1861 #else 1861 #else
1862 1862
1863 static inline void rcu_copy_process(struct task_struct *p) 1863 static inline void rcu_copy_process(struct task_struct *p)
1864 { 1864 {
1865 } 1865 }
1866 1866
1867 #endif 1867 #endif
1868 1868
1869 static inline void rcu_switch(struct task_struct *prev, 1869 static inline void rcu_switch(struct task_struct *prev,
1870 struct task_struct *next) 1870 struct task_struct *next)
1871 { 1871 {
1872 #ifdef CONFIG_RCU_USER_QS 1872 #ifdef CONFIG_RCU_USER_QS
1873 rcu_user_hooks_switch(prev, next); 1873 rcu_user_hooks_switch(prev, next);
1874 #endif 1874 #endif
1875 } 1875 }
1876 1876
1877 static inline void tsk_restore_flags(struct task_struct *task, 1877 static inline void tsk_restore_flags(struct task_struct *task,
1878 unsigned long orig_flags, unsigned long flags) 1878 unsigned long orig_flags, unsigned long flags)
1879 { 1879 {
1880 task->flags &= ~flags; 1880 task->flags &= ~flags;
1881 task->flags |= orig_flags & flags; 1881 task->flags |= orig_flags & flags;
1882 } 1882 }
1883 1883
1884 #ifdef CONFIG_SMP 1884 #ifdef CONFIG_SMP
1885 extern void do_set_cpus_allowed(struct task_struct *p, 1885 extern void do_set_cpus_allowed(struct task_struct *p,
1886 const struct cpumask *new_mask); 1886 const struct cpumask *new_mask);
1887 1887
1888 extern int set_cpus_allowed_ptr(struct task_struct *p, 1888 extern int set_cpus_allowed_ptr(struct task_struct *p,
1889 const struct cpumask *new_mask); 1889 const struct cpumask *new_mask);
1890 #else 1890 #else
1891 static inline void do_set_cpus_allowed(struct task_struct *p, 1891 static inline void do_set_cpus_allowed(struct task_struct *p,
1892 const struct cpumask *new_mask) 1892 const struct cpumask *new_mask)
1893 { 1893 {
1894 } 1894 }
1895 static inline int set_cpus_allowed_ptr(struct task_struct *p, 1895 static inline int set_cpus_allowed_ptr(struct task_struct *p,
1896 const struct cpumask *new_mask) 1896 const struct cpumask *new_mask)
1897 { 1897 {
1898 if (!cpumask_test_cpu(0, new_mask)) 1898 if (!cpumask_test_cpu(0, new_mask))
1899 return -EINVAL; 1899 return -EINVAL;
1900 return 0; 1900 return 0;
1901 } 1901 }
1902 #endif 1902 #endif
1903 1903
1904 #ifdef CONFIG_NO_HZ 1904 #ifdef CONFIG_NO_HZ
1905 void calc_load_enter_idle(void); 1905 void calc_load_enter_idle(void);
1906 void calc_load_exit_idle(void); 1906 void calc_load_exit_idle(void);
1907 #else 1907 #else
1908 static inline void calc_load_enter_idle(void) { } 1908 static inline void calc_load_enter_idle(void) { }
1909 static inline void calc_load_exit_idle(void) { } 1909 static inline void calc_load_exit_idle(void) { }
1910 #endif /* CONFIG_NO_HZ */ 1910 #endif /* CONFIG_NO_HZ */
1911 1911
1912 #ifndef CONFIG_CPUMASK_OFFSTACK 1912 #ifndef CONFIG_CPUMASK_OFFSTACK
1913 static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) 1913 static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
1914 { 1914 {
1915 return set_cpus_allowed_ptr(p, &new_mask); 1915 return set_cpus_allowed_ptr(p, &new_mask);
1916 } 1916 }
1917 #endif 1917 #endif
1918 1918
1919 /* 1919 /*
1920 * Do not use outside of architecture code which knows its limitations. 1920 * Do not use outside of architecture code which knows its limitations.
1921 * 1921 *
1922 * sched_clock() has no promise of monotonicity or bounded drift between 1922 * sched_clock() has no promise of monotonicity or bounded drift between
1923 * CPUs, use (which you should not) requires disabling IRQs. 1923 * CPUs, use (which you should not) requires disabling IRQs.
1924 * 1924 *
1925 * Please use one of the three interfaces below. 1925 * Please use one of the three interfaces below.
1926 */ 1926 */
1927 extern unsigned long long notrace sched_clock(void); 1927 extern unsigned long long notrace sched_clock(void);
1928 /* 1928 /*
1929 * See the comment in kernel/sched/clock.c 1929 * See the comment in kernel/sched/clock.c
1930 */ 1930 */
1931 extern u64 cpu_clock(int cpu); 1931 extern u64 cpu_clock(int cpu);
1932 extern u64 local_clock(void); 1932 extern u64 local_clock(void);
1933 extern u64 sched_clock_cpu(int cpu); 1933 extern u64 sched_clock_cpu(int cpu);
1934 1934
1935 1935
1936 extern void sched_clock_init(void); 1936 extern void sched_clock_init(void);
1937 1937
1938 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 1938 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1939 static inline void sched_clock_tick(void) 1939 static inline void sched_clock_tick(void)
1940 { 1940 {
1941 } 1941 }
1942 1942
1943 static inline void sched_clock_idle_sleep_event(void) 1943 static inline void sched_clock_idle_sleep_event(void)
1944 { 1944 {
1945 } 1945 }
1946 1946
1947 static inline void sched_clock_idle_wakeup_event(u64 delta_ns) 1947 static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
1948 { 1948 {
1949 } 1949 }
1950 #else 1950 #else
1951 /* 1951 /*
1952 * Architectures can set this to 1 if they have specified 1952 * Architectures can set this to 1 if they have specified
1953 * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, 1953 * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
1954 * but then during bootup it turns out that sched_clock() 1954 * but then during bootup it turns out that sched_clock()
1955 * is reliable after all: 1955 * is reliable after all:
1956 */ 1956 */
1957 extern int sched_clock_stable; 1957 extern int sched_clock_stable;
1958 1958
1959 extern void sched_clock_tick(void); 1959 extern void sched_clock_tick(void);
1960 extern void sched_clock_idle_sleep_event(void); 1960 extern void sched_clock_idle_sleep_event(void);
1961 extern void sched_clock_idle_wakeup_event(u64 delta_ns); 1961 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
1962 #endif 1962 #endif
1963 1963
1964 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 1964 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
1965 /* 1965 /*
1966 * An i/f to runtime opt-in for irq time accounting based off of sched_clock. 1966 * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
1967 * The reason for this explicit opt-in is not to have perf penalty with 1967 * The reason for this explicit opt-in is not to have perf penalty with
1968 * slow sched_clocks. 1968 * slow sched_clocks.
1969 */ 1969 */
1970 extern void enable_sched_clock_irqtime(void); 1970 extern void enable_sched_clock_irqtime(void);
1971 extern void disable_sched_clock_irqtime(void); 1971 extern void disable_sched_clock_irqtime(void);
1972 #else 1972 #else
1973 static inline void enable_sched_clock_irqtime(void) {} 1973 static inline void enable_sched_clock_irqtime(void) {}
1974 static inline void disable_sched_clock_irqtime(void) {} 1974 static inline void disable_sched_clock_irqtime(void) {}
1975 #endif 1975 #endif
1976 1976
1977 extern unsigned long long 1977 extern unsigned long long
1978 task_sched_runtime(struct task_struct *task); 1978 task_sched_runtime(struct task_struct *task);
1979 1979
1980 /* sched_exec is called by processes performing an exec */ 1980 /* sched_exec is called by processes performing an exec */
1981 #ifdef CONFIG_SMP 1981 #ifdef CONFIG_SMP
1982 extern void sched_exec(void); 1982 extern void sched_exec(void);
1983 #else 1983 #else
1984 #define sched_exec() {} 1984 #define sched_exec() {}
1985 #endif 1985 #endif
1986 1986
1987 extern void sched_clock_idle_sleep_event(void); 1987 extern void sched_clock_idle_sleep_event(void);
1988 extern void sched_clock_idle_wakeup_event(u64 delta_ns); 1988 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
1989 1989
1990 #ifdef CONFIG_HOTPLUG_CPU 1990 #ifdef CONFIG_HOTPLUG_CPU
1991 extern void idle_task_exit(void); 1991 extern void idle_task_exit(void);
1992 #else 1992 #else
1993 static inline void idle_task_exit(void) {} 1993 static inline void idle_task_exit(void) {}
1994 #endif 1994 #endif
1995 1995
1996 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 1996 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
1997 extern void wake_up_idle_cpu(int cpu); 1997 extern void wake_up_idle_cpu(int cpu);
1998 #else 1998 #else
1999 static inline void wake_up_idle_cpu(int cpu) { } 1999 static inline void wake_up_idle_cpu(int cpu) { }
2000 #endif 2000 #endif
2001 2001
2002 extern unsigned int sysctl_sched_latency; 2002 extern unsigned int sysctl_sched_latency;
2003 extern unsigned int sysctl_sched_min_granularity; 2003 extern unsigned int sysctl_sched_min_granularity;
2004 extern unsigned int sysctl_sched_wakeup_granularity; 2004 extern unsigned int sysctl_sched_wakeup_granularity;
2005 extern unsigned int sysctl_sched_child_runs_first; 2005 extern unsigned int sysctl_sched_child_runs_first;
2006 2006
2007 enum sched_tunable_scaling { 2007 enum sched_tunable_scaling {
2008 SCHED_TUNABLESCALING_NONE, 2008 SCHED_TUNABLESCALING_NONE,
2009 SCHED_TUNABLESCALING_LOG, 2009 SCHED_TUNABLESCALING_LOG,
2010 SCHED_TUNABLESCALING_LINEAR, 2010 SCHED_TUNABLESCALING_LINEAR,
2011 SCHED_TUNABLESCALING_END, 2011 SCHED_TUNABLESCALING_END,
2012 }; 2012 };
2013 extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; 2013 extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
2014 2014
2015 #ifdef CONFIG_SCHED_DEBUG 2015 #ifdef CONFIG_SCHED_DEBUG
2016 extern unsigned int sysctl_sched_migration_cost; 2016 extern unsigned int sysctl_sched_migration_cost;
2017 extern unsigned int sysctl_sched_nr_migrate; 2017 extern unsigned int sysctl_sched_nr_migrate;
2018 extern unsigned int sysctl_sched_time_avg; 2018 extern unsigned int sysctl_sched_time_avg;
2019 extern unsigned int sysctl_timer_migration; 2019 extern unsigned int sysctl_timer_migration;
2020 extern unsigned int sysctl_sched_shares_window; 2020 extern unsigned int sysctl_sched_shares_window;
2021 2021
2022 int sched_proc_update_handler(struct ctl_table *table, int write, 2022 int sched_proc_update_handler(struct ctl_table *table, int write,
2023 void __user *buffer, size_t *length, 2023 void __user *buffer, size_t *length,
2024 loff_t *ppos); 2024 loff_t *ppos);
2025 #endif 2025 #endif
2026 #ifdef CONFIG_SCHED_DEBUG 2026 #ifdef CONFIG_SCHED_DEBUG
2027 static inline unsigned int get_sysctl_timer_migration(void) 2027 static inline unsigned int get_sysctl_timer_migration(void)
2028 { 2028 {
2029 return sysctl_timer_migration; 2029 return sysctl_timer_migration;
2030 } 2030 }
2031 #else 2031 #else
2032 static inline unsigned int get_sysctl_timer_migration(void) 2032 static inline unsigned int get_sysctl_timer_migration(void)
2033 { 2033 {
2034 return 1; 2034 return 1;
2035 } 2035 }
2036 #endif 2036 #endif
2037 extern unsigned int sysctl_sched_rt_period; 2037 extern unsigned int sysctl_sched_rt_period;
2038 extern int sysctl_sched_rt_runtime; 2038 extern int sysctl_sched_rt_runtime;
2039 2039
2040 int sched_rt_handler(struct ctl_table *table, int write, 2040 int sched_rt_handler(struct ctl_table *table, int write,
2041 void __user *buffer, size_t *lenp, 2041 void __user *buffer, size_t *lenp,
2042 loff_t *ppos); 2042 loff_t *ppos);
2043 2043
2044 #ifdef CONFIG_SCHED_AUTOGROUP 2044 #ifdef CONFIG_SCHED_AUTOGROUP
2045 extern unsigned int sysctl_sched_autogroup_enabled; 2045 extern unsigned int sysctl_sched_autogroup_enabled;
2046 2046
2047 extern void sched_autogroup_create_attach(struct task_struct *p); 2047 extern void sched_autogroup_create_attach(struct task_struct *p);
2048 extern void sched_autogroup_detach(struct task_struct *p); 2048 extern void sched_autogroup_detach(struct task_struct *p);
2049 extern void sched_autogroup_fork(struct signal_struct *sig); 2049 extern void sched_autogroup_fork(struct signal_struct *sig);
2050 extern void sched_autogroup_exit(struct signal_struct *sig); 2050 extern void sched_autogroup_exit(struct signal_struct *sig);
2051 #ifdef CONFIG_PROC_FS 2051 #ifdef CONFIG_PROC_FS
2052 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); 2052 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
2053 extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); 2053 extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
2054 #endif 2054 #endif
2055 #else 2055 #else
2056 static inline void sched_autogroup_create_attach(struct task_struct *p) { } 2056 static inline void sched_autogroup_create_attach(struct task_struct *p) { }
2057 static inline void sched_autogroup_detach(struct task_struct *p) { } 2057 static inline void sched_autogroup_detach(struct task_struct *p) { }
2058 static inline void sched_autogroup_fork(struct signal_struct *sig) { } 2058 static inline void sched_autogroup_fork(struct signal_struct *sig) { }
2059 static inline void sched_autogroup_exit(struct signal_struct *sig) { } 2059 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
2060 #endif 2060 #endif
2061 2061
2062 #ifdef CONFIG_CFS_BANDWIDTH 2062 #ifdef CONFIG_CFS_BANDWIDTH
2063 extern unsigned int sysctl_sched_cfs_bandwidth_slice; 2063 extern unsigned int sysctl_sched_cfs_bandwidth_slice;
2064 #endif 2064 #endif
2065 2065
2066 #ifdef CONFIG_RT_MUTEXES 2066 #ifdef CONFIG_RT_MUTEXES
2067 extern int rt_mutex_getprio(struct task_struct *p); 2067 extern int rt_mutex_getprio(struct task_struct *p);
2068 extern void rt_mutex_setprio(struct task_struct *p, int prio); 2068 extern void rt_mutex_setprio(struct task_struct *p, int prio);
2069 extern void rt_mutex_adjust_pi(struct task_struct *p); 2069 extern void rt_mutex_adjust_pi(struct task_struct *p);
2070 static inline bool tsk_is_pi_blocked(struct task_struct *tsk) 2070 static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
2071 { 2071 {
2072 return tsk->pi_blocked_on != NULL; 2072 return tsk->pi_blocked_on != NULL;
2073 } 2073 }
2074 #else 2074 #else
2075 static inline int rt_mutex_getprio(struct task_struct *p) 2075 static inline int rt_mutex_getprio(struct task_struct *p)
2076 { 2076 {
2077 return p->normal_prio; 2077 return p->normal_prio;
2078 } 2078 }
2079 # define rt_mutex_adjust_pi(p) do { } while (0) 2079 # define rt_mutex_adjust_pi(p) do { } while (0)
2080 static inline bool tsk_is_pi_blocked(struct task_struct *tsk) 2080 static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
2081 { 2081 {
2082 return false; 2082 return false;
2083 } 2083 }
2084 #endif 2084 #endif
2085 2085
2086 extern bool yield_to(struct task_struct *p, bool preempt); 2086 extern bool yield_to(struct task_struct *p, bool preempt);
2087 extern void set_user_nice(struct task_struct *p, long nice); 2087 extern void set_user_nice(struct task_struct *p, long nice);
2088 extern int task_prio(const struct task_struct *p); 2088 extern int task_prio(const struct task_struct *p);
2089 extern int task_nice(const struct task_struct *p); 2089 extern int task_nice(const struct task_struct *p);
2090 extern int can_nice(const struct task_struct *p, const int nice); 2090 extern int can_nice(const struct task_struct *p, const int nice);
2091 extern int task_curr(const struct task_struct *p); 2091 extern int task_curr(const struct task_struct *p);
2092 extern int idle_cpu(int cpu); 2092 extern int idle_cpu(int cpu);
2093 extern int sched_setscheduler(struct task_struct *, int, 2093 extern int sched_setscheduler(struct task_struct *, int,
2094 const struct sched_param *); 2094 const struct sched_param *);
2095 extern int sched_setscheduler_nocheck(struct task_struct *, int, 2095 extern int sched_setscheduler_nocheck(struct task_struct *, int,
2096 const struct sched_param *); 2096 const struct sched_param *);
2097 extern struct task_struct *idle_task(int cpu); 2097 extern struct task_struct *idle_task(int cpu);
2098 /** 2098 /**
2099 * is_idle_task - is the specified task an idle task? 2099 * is_idle_task - is the specified task an idle task?
2100 * @p: the task in question. 2100 * @p: the task in question.
2101 */ 2101 */
2102 static inline bool is_idle_task(const struct task_struct *p) 2102 static inline bool is_idle_task(const struct task_struct *p)
2103 { 2103 {
2104 return p->pid == 0; 2104 return p->pid == 0;
2105 } 2105 }
2106 extern struct task_struct *curr_task(int cpu); 2106 extern struct task_struct *curr_task(int cpu);
2107 extern void set_curr_task(int cpu, struct task_struct *p); 2107 extern void set_curr_task(int cpu, struct task_struct *p);
2108 2108
2109 void yield(void); 2109 void yield(void);
2110 2110
2111 /* 2111 /*
2112 * The default (Linux) execution domain. 2112 * The default (Linux) execution domain.
2113 */ 2113 */
2114 extern struct exec_domain default_exec_domain; 2114 extern struct exec_domain default_exec_domain;
2115 2115
2116 union thread_union { 2116 union thread_union {
2117 struct thread_info thread_info; 2117 struct thread_info thread_info;
2118 unsigned long stack[THREAD_SIZE/sizeof(long)]; 2118 unsigned long stack[THREAD_SIZE/sizeof(long)];
2119 }; 2119 };
2120 2120
2121 #ifndef __HAVE_ARCH_KSTACK_END 2121 #ifndef __HAVE_ARCH_KSTACK_END
2122 static inline int kstack_end(void *addr) 2122 static inline int kstack_end(void *addr)
2123 { 2123 {
2124 /* Reliable end of stack detection: 2124 /* Reliable end of stack detection:
2125 * Some APM bios versions misalign the stack 2125 * Some APM bios versions misalign the stack
2126 */ 2126 */
2127 return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); 2127 return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
2128 } 2128 }
2129 #endif 2129 #endif
2130 2130
2131 extern union thread_union init_thread_union; 2131 extern union thread_union init_thread_union;
2132 extern struct task_struct init_task; 2132 extern struct task_struct init_task;
2133 2133
2134 extern struct mm_struct init_mm; 2134 extern struct mm_struct init_mm;
2135 2135
2136 extern struct pid_namespace init_pid_ns; 2136 extern struct pid_namespace init_pid_ns;
2137 2137
2138 /* 2138 /*
2139 * find a task by one of its numerical ids 2139 * find a task by one of its numerical ids
2140 * 2140 *
2141 * find_task_by_pid_ns(): 2141 * find_task_by_pid_ns():
2142 * finds a task by its pid in the specified namespace 2142 * finds a task by its pid in the specified namespace
2143 * find_task_by_vpid(): 2143 * find_task_by_vpid():
2144 * finds a task by its virtual pid 2144 * finds a task by its virtual pid
2145 * 2145 *
2146 * see also find_vpid() etc in include/linux/pid.h 2146 * see also find_vpid() etc in include/linux/pid.h
2147 */ 2147 */
2148 2148
2149 extern struct task_struct *find_task_by_vpid(pid_t nr); 2149 extern struct task_struct *find_task_by_vpid(pid_t nr);
2150 extern struct task_struct *find_task_by_pid_ns(pid_t nr, 2150 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
2151 struct pid_namespace *ns); 2151 struct pid_namespace *ns);
2152 2152
2153 extern void __set_special_pids(struct pid *pid); 2153 extern void __set_special_pids(struct pid *pid);
2154 2154
2155 /* per-UID process charging. */ 2155 /* per-UID process charging. */
2156 extern struct user_struct * alloc_uid(kuid_t); 2156 extern struct user_struct * alloc_uid(kuid_t);
2157 static inline struct user_struct *get_uid(struct user_struct *u) 2157 static inline struct user_struct *get_uid(struct user_struct *u)
2158 { 2158 {
2159 atomic_inc(&u->__count); 2159 atomic_inc(&u->__count);
2160 return u; 2160 return u;
2161 } 2161 }
2162 extern void free_uid(struct user_struct *); 2162 extern void free_uid(struct user_struct *);
2163 2163
2164 #include <asm/current.h> 2164 #include <asm/current.h>
2165 2165
2166 extern void xtime_update(unsigned long ticks); 2166 extern void xtime_update(unsigned long ticks);
2167 2167
2168 extern int wake_up_state(struct task_struct *tsk, unsigned int state); 2168 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
2169 extern int wake_up_process(struct task_struct *tsk); 2169 extern int wake_up_process(struct task_struct *tsk);
2170 extern void wake_up_new_task(struct task_struct *tsk); 2170 extern void wake_up_new_task(struct task_struct *tsk);
2171 #ifdef CONFIG_SMP 2171 #ifdef CONFIG_SMP
2172 extern void kick_process(struct task_struct *tsk); 2172 extern void kick_process(struct task_struct *tsk);
2173 #else 2173 #else
2174 static inline void kick_process(struct task_struct *tsk) { } 2174 static inline void kick_process(struct task_struct *tsk) { }
2175 #endif 2175 #endif
2176 extern void sched_fork(struct task_struct *p); 2176 extern void sched_fork(struct task_struct *p);
2177 extern void sched_dead(struct task_struct *p); 2177 extern void sched_dead(struct task_struct *p);
2178 2178
2179 extern void proc_caches_init(void); 2179 extern void proc_caches_init(void);
2180 extern void flush_signals(struct task_struct *); 2180 extern void flush_signals(struct task_struct *);
2181 extern void __flush_signals(struct task_struct *); 2181 extern void __flush_signals(struct task_struct *);
2182 extern void ignore_signals(struct task_struct *); 2182 extern void ignore_signals(struct task_struct *);
2183 extern void flush_signal_handlers(struct task_struct *, int force_default); 2183 extern void flush_signal_handlers(struct task_struct *, int force_default);
2184 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); 2184 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
2185 2185
2186 static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) 2186 static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
2187 { 2187 {
2188 unsigned long flags; 2188 unsigned long flags;
2189 int ret; 2189 int ret;
2190 2190
2191 spin_lock_irqsave(&tsk->sighand->siglock, flags); 2191 spin_lock_irqsave(&tsk->sighand->siglock, flags);
2192 ret = dequeue_signal(tsk, mask, info); 2192 ret = dequeue_signal(tsk, mask, info);
2193 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 2193 spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
2194 2194
2195 return ret; 2195 return ret;
2196 } 2196 }
2197 2197
2198 extern void block_all_signals(int (*notifier)(void *priv), void *priv, 2198 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
2199 sigset_t *mask); 2199 sigset_t *mask);
2200 extern void unblock_all_signals(void); 2200 extern void unblock_all_signals(void);
2201 extern void release_task(struct task_struct * p); 2201 extern void release_task(struct task_struct * p);
2202 extern int send_sig_info(int, struct siginfo *, struct task_struct *); 2202 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
2203 extern int force_sigsegv(int, struct task_struct *); 2203 extern int force_sigsegv(int, struct task_struct *);
2204 extern int force_sig_info(int, struct siginfo *, struct task_struct *); 2204 extern int force_sig_info(int, struct siginfo *, struct task_struct *);
2205 extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); 2205 extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
2206 extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); 2206 extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
2207 extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, 2207 extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
2208 const struct cred *, u32); 2208 const struct cred *, u32);
2209 extern int kill_pgrp(struct pid *pid, int sig, int priv); 2209 extern int kill_pgrp(struct pid *pid, int sig, int priv);
2210 extern int kill_pid(struct pid *pid, int sig, int priv); 2210 extern int kill_pid(struct pid *pid, int sig, int priv);
2211 extern int kill_proc_info(int, struct siginfo *, pid_t); 2211 extern int kill_proc_info(int, struct siginfo *, pid_t);
2212 extern __must_check bool do_notify_parent(struct task_struct *, int); 2212 extern __must_check bool do_notify_parent(struct task_struct *, int);
2213 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); 2213 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
2214 extern void force_sig(int, struct task_struct *); 2214 extern void force_sig(int, struct task_struct *);
2215 extern int send_sig(int, struct task_struct *, int); 2215 extern int send_sig(int, struct task_struct *, int);
2216 extern int zap_other_threads(struct task_struct *p); 2216 extern int zap_other_threads(struct task_struct *p);
2217 extern struct sigqueue *sigqueue_alloc(void); 2217 extern struct sigqueue *sigqueue_alloc(void);
2218 extern void sigqueue_free(struct sigqueue *); 2218 extern void sigqueue_free(struct sigqueue *);
2219 extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); 2219 extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group);
2220 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); 2220 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
2221 extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long); 2221 extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
2222 2222
2223 static inline void restore_saved_sigmask(void) 2223 static inline void restore_saved_sigmask(void)
2224 { 2224 {
2225 if (test_and_clear_restore_sigmask()) 2225 if (test_and_clear_restore_sigmask())
2226 __set_current_blocked(&current->saved_sigmask); 2226 __set_current_blocked(&current->saved_sigmask);
2227 } 2227 }
2228 2228
2229 static inline sigset_t *sigmask_to_save(void) 2229 static inline sigset_t *sigmask_to_save(void)
2230 { 2230 {
2231 sigset_t *res = &current->blocked; 2231 sigset_t *res = &current->blocked;
2232 if (unlikely(test_restore_sigmask())) 2232 if (unlikely(test_restore_sigmask()))
2233 res = &current->saved_sigmask; 2233 res = &current->saved_sigmask;
2234 return res; 2234 return res;
2235 } 2235 }
2236 2236
2237 static inline int kill_cad_pid(int sig, int priv) 2237 static inline int kill_cad_pid(int sig, int priv)
2238 { 2238 {
2239 return kill_pid(cad_pid, sig, priv); 2239 return kill_pid(cad_pid, sig, priv);
2240 } 2240 }
2241 2241
2242 /* These can be the second arg to send_sig_info/send_group_sig_info. */ 2242 /* These can be the second arg to send_sig_info/send_group_sig_info. */
2243 #define SEND_SIG_NOINFO ((struct siginfo *) 0) 2243 #define SEND_SIG_NOINFO ((struct siginfo *) 0)
2244 #define SEND_SIG_PRIV ((struct siginfo *) 1) 2244 #define SEND_SIG_PRIV ((struct siginfo *) 1)
2245 #define SEND_SIG_FORCED ((struct siginfo *) 2) 2245 #define SEND_SIG_FORCED ((struct siginfo *) 2)
2246 2246
2247 /* 2247 /*
2248 * True if we are on the alternate signal stack. 2248 * True if we are on the alternate signal stack.
2249 */ 2249 */
2250 static inline int on_sig_stack(unsigned long sp) 2250 static inline int on_sig_stack(unsigned long sp)
2251 { 2251 {
2252 #ifdef CONFIG_STACK_GROWSUP 2252 #ifdef CONFIG_STACK_GROWSUP
2253 return sp >= current->sas_ss_sp && 2253 return sp >= current->sas_ss_sp &&
2254 sp - current->sas_ss_sp < current->sas_ss_size; 2254 sp - current->sas_ss_sp < current->sas_ss_size;
2255 #else 2255 #else
2256 return sp > current->sas_ss_sp && 2256 return sp > current->sas_ss_sp &&
2257 sp - current->sas_ss_sp <= current->sas_ss_size; 2257 sp - current->sas_ss_sp <= current->sas_ss_size;
2258 #endif 2258 #endif
2259 } 2259 }
2260 2260
2261 static inline int sas_ss_flags(unsigned long sp) 2261 static inline int sas_ss_flags(unsigned long sp)
2262 { 2262 {
2263 return (current->sas_ss_size == 0 ? SS_DISABLE 2263 return (current->sas_ss_size == 0 ? SS_DISABLE
2264 : on_sig_stack(sp) ? SS_ONSTACK : 0); 2264 : on_sig_stack(sp) ? SS_ONSTACK : 0);
2265 } 2265 }
2266 2266
2267 /* 2267 /*
2268 * Routines for handling mm_structs 2268 * Routines for handling mm_structs
2269 */ 2269 */
2270 extern struct mm_struct * mm_alloc(void); 2270 extern struct mm_struct * mm_alloc(void);
2271 2271
2272 /* mmdrop drops the mm and the page tables */ 2272 /* mmdrop drops the mm and the page tables */
2273 extern void __mmdrop(struct mm_struct *); 2273 extern void __mmdrop(struct mm_struct *);
2274 static inline void mmdrop(struct mm_struct * mm) 2274 static inline void mmdrop(struct mm_struct * mm)
2275 { 2275 {
2276 if (unlikely(atomic_dec_and_test(&mm->mm_count))) 2276 if (unlikely(atomic_dec_and_test(&mm->mm_count)))
2277 __mmdrop(mm); 2277 __mmdrop(mm);
2278 } 2278 }
2279 2279
2280 /* mmput gets rid of the mappings and all user-space */ 2280 /* mmput gets rid of the mappings and all user-space */
2281 extern void mmput(struct mm_struct *); 2281 extern void mmput(struct mm_struct *);
2282 /* Grab a reference to a task's mm, if it is not already going away */ 2282 /* Grab a reference to a task's mm, if it is not already going away */
2283 extern struct mm_struct *get_task_mm(struct task_struct *task); 2283 extern struct mm_struct *get_task_mm(struct task_struct *task);
2284 /* 2284 /*
2285 * Grab a reference to a task's mm, if it is not already going away 2285 * Grab a reference to a task's mm, if it is not already going away
2286 * and ptrace_may_access with the mode parameter passed to it 2286 * and ptrace_may_access with the mode parameter passed to it
2287 * succeeds. 2287 * succeeds.
2288 */ 2288 */
2289 extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); 2289 extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
2290 /* Remove the current tasks stale references to the old mm_struct */ 2290 /* Remove the current tasks stale references to the old mm_struct */
2291 extern void mm_release(struct task_struct *, struct mm_struct *); 2291 extern void mm_release(struct task_struct *, struct mm_struct *);
2292 /* Allocate a new mm structure and copy contents from tsk->mm */ 2292 /* Allocate a new mm structure and copy contents from tsk->mm */
2293 extern struct mm_struct *dup_mm(struct task_struct *tsk); 2293 extern struct mm_struct *dup_mm(struct task_struct *tsk);
2294 2294
2295 extern int copy_thread(unsigned long, unsigned long, unsigned long, 2295 extern int copy_thread(unsigned long, unsigned long, unsigned long,
2296 struct task_struct *, struct pt_regs *); 2296 struct task_struct *, struct pt_regs *);
2297 extern void flush_thread(void); 2297 extern void flush_thread(void);
2298 extern void exit_thread(void); 2298 extern void exit_thread(void);
2299 2299
2300 extern void exit_files(struct task_struct *); 2300 extern void exit_files(struct task_struct *);
2301 extern void __cleanup_sighand(struct sighand_struct *); 2301 extern void __cleanup_sighand(struct sighand_struct *);
2302 2302
2303 extern void exit_itimers(struct signal_struct *); 2303 extern void exit_itimers(struct signal_struct *);
2304 extern void flush_itimer_signals(void); 2304 extern void flush_itimer_signals(void);
2305 2305
2306 extern void do_group_exit(int); 2306 extern void do_group_exit(int);
2307 2307
2308 extern void daemonize(const char *, ...); 2308 extern void daemonize(const char *, ...);
2309 extern int allow_signal(int); 2309 extern int allow_signal(int);
2310 extern int disallow_signal(int); 2310 extern int disallow_signal(int);
2311 2311
2312 extern int do_execve(const char *, 2312 extern int do_execve(const char *,
2313 const char __user * const __user *, 2313 const char __user * const __user *,
2314 const char __user * const __user *, struct pt_regs *); 2314 const char __user * const __user *, struct pt_regs *);
2315 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); 2315 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
2316 struct task_struct *fork_idle(int); 2316 struct task_struct *fork_idle(int);
2317 #ifdef CONFIG_GENERIC_KERNEL_THREAD 2317 #ifdef CONFIG_GENERIC_KERNEL_THREAD
2318 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); 2318 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
2319 #endif 2319 #endif
2320 2320
2321 extern void set_task_comm(struct task_struct *tsk, char *from); 2321 extern void set_task_comm(struct task_struct *tsk, char *from);
2322 extern char *get_task_comm(char *to, struct task_struct *tsk); 2322 extern char *get_task_comm(char *to, struct task_struct *tsk);
2323 2323
2324 #ifdef CONFIG_SMP 2324 #ifdef CONFIG_SMP
2325 void scheduler_ipi(void); 2325 void scheduler_ipi(void);
2326 extern unsigned long wait_task_inactive(struct task_struct *, long match_state); 2326 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
2327 #else 2327 #else
2328 static inline void scheduler_ipi(void) { } 2328 static inline void scheduler_ipi(void) { }
2329 static inline unsigned long wait_task_inactive(struct task_struct *p, 2329 static inline unsigned long wait_task_inactive(struct task_struct *p,
2330 long match_state) 2330 long match_state)
2331 { 2331 {
2332 return 1; 2332 return 1;
2333 } 2333 }
2334 #endif 2334 #endif
2335 2335
2336 #define next_task(p) \ 2336 #define next_task(p) \
2337 list_entry_rcu((p)->tasks.next, struct task_struct, tasks) 2337 list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
2338 2338
2339 #define for_each_process(p) \ 2339 #define for_each_process(p) \
2340 for (p = &init_task ; (p = next_task(p)) != &init_task ; ) 2340 for (p = &init_task ; (p = next_task(p)) != &init_task ; )
2341 2341
2342 extern bool current_is_single_threaded(void); 2342 extern bool current_is_single_threaded(void);
2343 2343
2344 /* 2344 /*
2345 * Careful: do_each_thread/while_each_thread is a double loop so 2345 * Careful: do_each_thread/while_each_thread is a double loop so
2346 * 'break' will not work as expected - use goto instead. 2346 * 'break' will not work as expected - use goto instead.
2347 */ 2347 */
2348 #define do_each_thread(g, t) \ 2348 #define do_each_thread(g, t) \
2349 for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do 2349 for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
2350 2350
2351 #define while_each_thread(g, t) \ 2351 #define while_each_thread(g, t) \
2352 while ((t = next_thread(t)) != g) 2352 while ((t = next_thread(t)) != g)
2353 2353
2354 static inline int get_nr_threads(struct task_struct *tsk) 2354 static inline int get_nr_threads(struct task_struct *tsk)
2355 { 2355 {
2356 return tsk->signal->nr_threads; 2356 return tsk->signal->nr_threads;
2357 } 2357 }
2358 2358
2359 static inline bool thread_group_leader(struct task_struct *p) 2359 static inline bool thread_group_leader(struct task_struct *p)
2360 { 2360 {
2361 return p->exit_signal >= 0; 2361 return p->exit_signal >= 0;
2362 } 2362 }
2363 2363
2364 /* Do to the insanities of de_thread it is possible for a process 2364 /* Do to the insanities of de_thread it is possible for a process
2365 * to have the pid of the thread group leader without actually being 2365 * to have the pid of the thread group leader without actually being
2366 * the thread group leader. For iteration through the pids in proc 2366 * the thread group leader. For iteration through the pids in proc
2367 * all we care about is that we have a task with the appropriate 2367 * all we care about is that we have a task with the appropriate
2368 * pid, we don't actually care if we have the right task. 2368 * pid, we don't actually care if we have the right task.
2369 */ 2369 */
2370 static inline int has_group_leader_pid(struct task_struct *p) 2370 static inline int has_group_leader_pid(struct task_struct *p)
2371 { 2371 {
2372 return p->pid == p->tgid; 2372 return p->pid == p->tgid;
2373 } 2373 }
2374 2374
2375 static inline 2375 static inline
2376 int same_thread_group(struct task_struct *p1, struct task_struct *p2) 2376 int same_thread_group(struct task_struct *p1, struct task_struct *p2)
2377 { 2377 {
2378 return p1->tgid == p2->tgid; 2378 return p1->tgid == p2->tgid;
2379 } 2379 }
2380 2380
2381 static inline struct task_struct *next_thread(const struct task_struct *p) 2381 static inline struct task_struct *next_thread(const struct task_struct *p)
2382 { 2382 {
2383 return list_entry_rcu(p->thread_group.next, 2383 return list_entry_rcu(p->thread_group.next,
2384 struct task_struct, thread_group); 2384 struct task_struct, thread_group);
2385 } 2385 }
2386 2386
2387 static inline int thread_group_empty(struct task_struct *p) 2387 static inline int thread_group_empty(struct task_struct *p)
2388 { 2388 {
2389 return list_empty(&p->thread_group); 2389 return list_empty(&p->thread_group);
2390 } 2390 }
2391 2391
2392 #define delay_group_leader(p) \ 2392 #define delay_group_leader(p) \
2393 (thread_group_leader(p) && !thread_group_empty(p)) 2393 (thread_group_leader(p) && !thread_group_empty(p))
2394 2394
2395 /* 2395 /*
2396 * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring 2396 * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
2397 * subscriptions and synchronises with wait4(). Also used in procfs. Also 2397 * subscriptions and synchronises with wait4(). Also used in procfs. Also
2398 * pins the final release of task.io_context. Also protects ->cpuset and 2398 * pins the final release of task.io_context. Also protects ->cpuset and
2399 * ->cgroup.subsys[]. And ->vfork_done. 2399 * ->cgroup.subsys[]. And ->vfork_done.
2400 * 2400 *
2401 * Nests both inside and outside of read_lock(&tasklist_lock). 2401 * Nests both inside and outside of read_lock(&tasklist_lock).
2402 * It must not be nested with write_lock_irq(&tasklist_lock), 2402 * It must not be nested with write_lock_irq(&tasklist_lock),
2403 * neither inside nor outside. 2403 * neither inside nor outside.
2404 */ 2404 */
2405 static inline void task_lock(struct task_struct *p) 2405 static inline void task_lock(struct task_struct *p)
2406 { 2406 {
2407 spin_lock(&p->alloc_lock); 2407 spin_lock(&p->alloc_lock);
2408 } 2408 }
2409 2409
2410 static inline void task_unlock(struct task_struct *p) 2410 static inline void task_unlock(struct task_struct *p)
2411 { 2411 {
2412 spin_unlock(&p->alloc_lock); 2412 spin_unlock(&p->alloc_lock);
2413 } 2413 }
2414 2414
2415 extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, 2415 extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
2416 unsigned long *flags); 2416 unsigned long *flags);
2417 2417
2418 static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, 2418 static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
2419 unsigned long *flags) 2419 unsigned long *flags)
2420 { 2420 {
2421 struct sighand_struct *ret; 2421 struct sighand_struct *ret;
2422 2422
2423 ret = __lock_task_sighand(tsk, flags); 2423 ret = __lock_task_sighand(tsk, flags);
2424 (void)__cond_lock(&tsk->sighand->siglock, ret); 2424 (void)__cond_lock(&tsk->sighand->siglock, ret);
2425 return ret; 2425 return ret;
2426 } 2426 }
2427 2427
2428 static inline void unlock_task_sighand(struct task_struct *tsk, 2428 static inline void unlock_task_sighand(struct task_struct *tsk,
2429 unsigned long *flags) 2429 unsigned long *flags)
2430 { 2430 {
2431 spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); 2431 spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
2432 } 2432 }
2433 2433
2434 #ifdef CONFIG_CGROUPS 2434 #ifdef CONFIG_CGROUPS
2435 static inline void threadgroup_change_begin(struct task_struct *tsk) 2435 static inline void threadgroup_change_begin(struct task_struct *tsk)
2436 { 2436 {
2437 down_read(&tsk->signal->group_rwsem); 2437 down_read(&tsk->signal->group_rwsem);
2438 } 2438 }
2439 static inline void threadgroup_change_end(struct task_struct *tsk) 2439 static inline void threadgroup_change_end(struct task_struct *tsk)
2440 { 2440 {
2441 up_read(&tsk->signal->group_rwsem); 2441 up_read(&tsk->signal->group_rwsem);
2442 } 2442 }
2443 2443
2444 /** 2444 /**
2445 * threadgroup_lock - lock threadgroup 2445 * threadgroup_lock - lock threadgroup
2446 * @tsk: member task of the threadgroup to lock 2446 * @tsk: member task of the threadgroup to lock
2447 * 2447 *
2448 * Lock the threadgroup @tsk belongs to. No new task is allowed to enter 2448 * Lock the threadgroup @tsk belongs to. No new task is allowed to enter
2449 * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or 2449 * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
2450 * perform exec. This is useful for cases where the threadgroup needs to 2450 * perform exec. This is useful for cases where the threadgroup needs to
2451 * stay stable across blockable operations. 2451 * stay stable across blockable operations.
2452 * 2452 *
2453 * fork and exit paths explicitly call threadgroup_change_{begin|end}() for 2453 * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
2454 * synchronization. While held, no new task will be added to threadgroup 2454 * synchronization. While held, no new task will be added to threadgroup
2455 * and no existing live task will have its PF_EXITING set. 2455 * and no existing live task will have its PF_EXITING set.
2456 * 2456 *
2457 * During exec, a task goes and puts its thread group through unusual 2457 * During exec, a task goes and puts its thread group through unusual
2458 * changes. After de-threading, exclusive access is assumed to resources 2458 * changes. After de-threading, exclusive access is assumed to resources
2459 * which are usually shared by tasks in the same group - e.g. sighand may 2459 * which are usually shared by tasks in the same group - e.g. sighand may
2460 * be replaced with a new one. Also, the exec'ing task takes over group 2460 * be replaced with a new one. Also, the exec'ing task takes over group
2461 * leader role including its pid. Exclude these changes while locked by 2461 * leader role including its pid. Exclude these changes while locked by
2462 * grabbing cred_guard_mutex which is used to synchronize exec path. 2462 * grabbing cred_guard_mutex which is used to synchronize exec path.
2463 */ 2463 */
2464 static inline void threadgroup_lock(struct task_struct *tsk) 2464 static inline void threadgroup_lock(struct task_struct *tsk)
2465 { 2465 {
2466 /* 2466 /*
2467 * exec uses exit for de-threading nesting group_rwsem inside 2467 * exec uses exit for de-threading nesting group_rwsem inside
2468 * cred_guard_mutex. Grab cred_guard_mutex first. 2468 * cred_guard_mutex. Grab cred_guard_mutex first.
2469 */ 2469 */
2470 mutex_lock(&tsk->signal->cred_guard_mutex); 2470 mutex_lock(&tsk->signal->cred_guard_mutex);
2471 down_write(&tsk->signal->group_rwsem); 2471 down_write(&tsk->signal->group_rwsem);
2472 } 2472 }
2473 2473
2474 /** 2474 /**
2475 * threadgroup_unlock - unlock threadgroup 2475 * threadgroup_unlock - unlock threadgroup
2476 * @tsk: member task of the threadgroup to unlock 2476 * @tsk: member task of the threadgroup to unlock
2477 * 2477 *
2478 * Reverse threadgroup_lock(). 2478 * Reverse threadgroup_lock().
2479 */ 2479 */
2480 static inline void threadgroup_unlock(struct task_struct *tsk) 2480 static inline void threadgroup_unlock(struct task_struct *tsk)
2481 { 2481 {
2482 up_write(&tsk->signal->group_rwsem); 2482 up_write(&tsk->signal->group_rwsem);
2483 mutex_unlock(&tsk->signal->cred_guard_mutex); 2483 mutex_unlock(&tsk->signal->cred_guard_mutex);
2484 } 2484 }
2485 #else 2485 #else
2486 static inline void threadgroup_change_begin(struct task_struct *tsk) {} 2486 static inline void threadgroup_change_begin(struct task_struct *tsk) {}
2487 static inline void threadgroup_change_end(struct task_struct *tsk) {} 2487 static inline void threadgroup_change_end(struct task_struct *tsk) {}
2488 static inline void threadgroup_lock(struct task_struct *tsk) {} 2488 static inline void threadgroup_lock(struct task_struct *tsk) {}
2489 static inline void threadgroup_unlock(struct task_struct *tsk) {} 2489 static inline void threadgroup_unlock(struct task_struct *tsk) {}
2490 #endif 2490 #endif
2491 2491
2492 #ifndef __HAVE_THREAD_FUNCTIONS 2492 #ifndef __HAVE_THREAD_FUNCTIONS
2493 2493
2494 #define task_thread_info(task) ((struct thread_info *)(task)->stack) 2494 #define task_thread_info(task) ((struct thread_info *)(task)->stack)
2495 #define task_stack_page(task) ((task)->stack) 2495 #define task_stack_page(task) ((task)->stack)
2496 2496
2497 static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) 2497 static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
2498 { 2498 {
2499 *task_thread_info(p) = *task_thread_info(org); 2499 *task_thread_info(p) = *task_thread_info(org);
2500 task_thread_info(p)->task = p; 2500 task_thread_info(p)->task = p;
2501 } 2501 }
2502 2502
2503 static inline unsigned long *end_of_stack(struct task_struct *p) 2503 static inline unsigned long *end_of_stack(struct task_struct *p)
2504 { 2504 {
2505 return (unsigned long *)(task_thread_info(p) + 1); 2505 return (unsigned long *)(task_thread_info(p) + 1);
2506 } 2506 }
2507 2507
2508 #endif 2508 #endif
2509 2509
2510 static inline int object_is_on_stack(void *obj) 2510 static inline int object_is_on_stack(void *obj)
2511 { 2511 {
2512 void *stack = task_stack_page(current); 2512 void *stack = task_stack_page(current);
2513 2513
2514 return (obj >= stack) && (obj < (stack + THREAD_SIZE)); 2514 return (obj >= stack) && (obj < (stack + THREAD_SIZE));
2515 } 2515 }
2516 2516
2517 extern void thread_info_cache_init(void); 2517 extern void thread_info_cache_init(void);
2518 2518
2519 #ifdef CONFIG_DEBUG_STACK_USAGE 2519 #ifdef CONFIG_DEBUG_STACK_USAGE
2520 static inline unsigned long stack_not_used(struct task_struct *p) 2520 static inline unsigned long stack_not_used(struct task_struct *p)
2521 { 2521 {
2522 unsigned long *n = end_of_stack(p); 2522 unsigned long *n = end_of_stack(p);
2523 2523
2524 do { /* Skip over canary */ 2524 do { /* Skip over canary */
2525 n++; 2525 n++;
2526 } while (!*n); 2526 } while (!*n);
2527 2527
2528 return (unsigned long)n - (unsigned long)end_of_stack(p); 2528 return (unsigned long)n - (unsigned long)end_of_stack(p);
2529 } 2529 }
2530 #endif 2530 #endif
2531 2531
2532 /* set thread flags in other task's structures 2532 /* set thread flags in other task's structures
2533 * - see asm/thread_info.h for TIF_xxxx flags available 2533 * - see asm/thread_info.h for TIF_xxxx flags available
2534 */ 2534 */
2535 static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) 2535 static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
2536 { 2536 {
2537 set_ti_thread_flag(task_thread_info(tsk), flag); 2537 set_ti_thread_flag(task_thread_info(tsk), flag);
2538 } 2538 }
2539 2539
2540 static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) 2540 static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2541 { 2541 {
2542 clear_ti_thread_flag(task_thread_info(tsk), flag); 2542 clear_ti_thread_flag(task_thread_info(tsk), flag);
2543 } 2543 }
2544 2544
2545 static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) 2545 static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
2546 { 2546 {
2547 return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); 2547 return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
2548 } 2548 }
2549 2549
2550 static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) 2550 static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2551 { 2551 {
2552 return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); 2552 return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
2553 } 2553 }
2554 2554
2555 static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) 2555 static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
2556 { 2556 {
2557 return test_ti_thread_flag(task_thread_info(tsk), flag); 2557 return test_ti_thread_flag(task_thread_info(tsk), flag);
2558 } 2558 }
2559 2559
2560 static inline void set_tsk_need_resched(struct task_struct *tsk) 2560 static inline void set_tsk_need_resched(struct task_struct *tsk)
2561 { 2561 {
2562 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); 2562 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2563 } 2563 }
2564 2564
2565 static inline void clear_tsk_need_resched(struct task_struct *tsk) 2565 static inline void clear_tsk_need_resched(struct task_struct *tsk)
2566 { 2566 {
2567 clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); 2567 clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2568 } 2568 }
2569 2569
2570 static inline int test_tsk_need_resched(struct task_struct *tsk) 2570 static inline int test_tsk_need_resched(struct task_struct *tsk)
2571 { 2571 {
2572 return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); 2572 return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
2573 } 2573 }
2574 2574
2575 static inline int restart_syscall(void) 2575 static inline int restart_syscall(void)
2576 { 2576 {
2577 set_tsk_thread_flag(current, TIF_SIGPENDING); 2577 set_tsk_thread_flag(current, TIF_SIGPENDING);
2578 return -ERESTARTNOINTR; 2578 return -ERESTARTNOINTR;
2579 } 2579 }
2580 2580
2581 static inline int signal_pending(struct task_struct *p) 2581 static inline int signal_pending(struct task_struct *p)
2582 { 2582 {
2583 return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); 2583 return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
2584 } 2584 }
2585 2585
2586 static inline int __fatal_signal_pending(struct task_struct *p) 2586 static inline int __fatal_signal_pending(struct task_struct *p)
2587 { 2587 {
2588 return unlikely(sigismember(&p->pending.signal, SIGKILL)); 2588 return unlikely(sigismember(&p->pending.signal, SIGKILL));
2589 } 2589 }
2590 2590
2591 static inline int fatal_signal_pending(struct task_struct *p) 2591 static inline int fatal_signal_pending(struct task_struct *p)
2592 { 2592 {
2593 return signal_pending(p) && __fatal_signal_pending(p); 2593 return signal_pending(p) && __fatal_signal_pending(p);
2594 } 2594 }
2595 2595
2596 static inline int signal_pending_state(long state, struct task_struct *p) 2596 static inline int signal_pending_state(long state, struct task_struct *p)
2597 { 2597 {
2598 if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) 2598 if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
2599 return 0; 2599 return 0;
2600 if (!signal_pending(p)) 2600 if (!signal_pending(p))
2601 return 0; 2601 return 0;
2602 2602
2603 return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); 2603 return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
2604 } 2604 }
2605 2605
2606 static inline int need_resched(void) 2606 static inline int need_resched(void)
2607 { 2607 {
2608 return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 2608 return unlikely(test_thread_flag(TIF_NEED_RESCHED));
2609 } 2609 }
2610 2610
2611 /* 2611 /*
2612 * cond_resched() and cond_resched_lock(): latency reduction via 2612 * cond_resched() and cond_resched_lock(): latency reduction via
2613 * explicit rescheduling in places that are safe. The return 2613 * explicit rescheduling in places that are safe. The return
2614 * value indicates whether a reschedule was done in fact. 2614 * value indicates whether a reschedule was done in fact.
2615 * cond_resched_lock() will drop the spinlock before scheduling, 2615 * cond_resched_lock() will drop the spinlock before scheduling,
2616 * cond_resched_softirq() will enable bhs before scheduling. 2616 * cond_resched_softirq() will enable bhs before scheduling.
2617 */ 2617 */
2618 extern int _cond_resched(void); 2618 extern int _cond_resched(void);
2619 2619
2620 #define cond_resched() ({ \ 2620 #define cond_resched() ({ \
2621 __might_sleep(__FILE__, __LINE__, 0); \ 2621 __might_sleep(__FILE__, __LINE__, 0); \
2622 _cond_resched(); \ 2622 _cond_resched(); \
2623 }) 2623 })
2624 2624
2625 extern int __cond_resched_lock(spinlock_t *lock); 2625 extern int __cond_resched_lock(spinlock_t *lock);
2626 2626
2627 #ifdef CONFIG_PREEMPT_COUNT 2627 #ifdef CONFIG_PREEMPT_COUNT
2628 #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET 2628 #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
2629 #else 2629 #else
2630 #define PREEMPT_LOCK_OFFSET 0 2630 #define PREEMPT_LOCK_OFFSET 0
2631 #endif 2631 #endif
2632 2632
2633 #define cond_resched_lock(lock) ({ \ 2633 #define cond_resched_lock(lock) ({ \
2634 __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ 2634 __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
2635 __cond_resched_lock(lock); \ 2635 __cond_resched_lock(lock); \
2636 }) 2636 })
2637 2637
2638 extern int __cond_resched_softirq(void); 2638 extern int __cond_resched_softirq(void);
2639 2639
2640 #define cond_resched_softirq() ({ \ 2640 #define cond_resched_softirq() ({ \
2641 __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ 2641 __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
2642 __cond_resched_softirq(); \ 2642 __cond_resched_softirq(); \
2643 }) 2643 })
2644 2644
2645 /* 2645 /*
2646 * Does a critical section need to be broken due to another 2646 * Does a critical section need to be broken due to another
2647 * task waiting?: (technically does not depend on CONFIG_PREEMPT, 2647 * task waiting?: (technically does not depend on CONFIG_PREEMPT,
2648 * but a general need for low latency) 2648 * but a general need for low latency)
2649 */ 2649 */
2650 static inline int spin_needbreak(spinlock_t *lock) 2650 static inline int spin_needbreak(spinlock_t *lock)
2651 { 2651 {
2652 #ifdef CONFIG_PREEMPT 2652 #ifdef CONFIG_PREEMPT
2653 return spin_is_contended(lock); 2653 return spin_is_contended(lock);
2654 #else 2654 #else
2655 return 0; 2655 return 0;
2656 #endif 2656 #endif
2657 } 2657 }
2658 2658
2659 /* 2659 /*
2660 * Thread group CPU time accounting. 2660 * Thread group CPU time accounting.
2661 */ 2661 */
2662 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); 2662 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
2663 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); 2663 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
2664 2664
2665 static inline void thread_group_cputime_init(struct signal_struct *sig) 2665 static inline void thread_group_cputime_init(struct signal_struct *sig)
2666 { 2666 {
2667 raw_spin_lock_init(&sig->cputimer.lock); 2667 raw_spin_lock_init(&sig->cputimer.lock);
2668 } 2668 }
2669 2669
2670 /* 2670 /*
2671 * Reevaluate whether the task has signals pending delivery. 2671 * Reevaluate whether the task has signals pending delivery.
2672 * Wake the task if so. 2672 * Wake the task if so.
2673 * This is required every time the blocked sigset_t changes. 2673 * This is required every time the blocked sigset_t changes.
2674 * callers must hold sighand->siglock. 2674 * callers must hold sighand->siglock.
2675 */ 2675 */
2676 extern void recalc_sigpending_and_wake(struct task_struct *t); 2676 extern void recalc_sigpending_and_wake(struct task_struct *t);
2677 extern void recalc_sigpending(void); 2677 extern void recalc_sigpending(void);
2678 2678
2679 extern void signal_wake_up(struct task_struct *t, int resume_stopped); 2679 extern void signal_wake_up(struct task_struct *t, int resume_stopped);
2680 2680
2681 /* 2681 /*
2682 * Wrappers for p->thread_info->cpu access. No-op on UP. 2682 * Wrappers for p->thread_info->cpu access. No-op on UP.
2683 */ 2683 */
2684 #ifdef CONFIG_SMP 2684 #ifdef CONFIG_SMP
2685 2685
2686 static inline unsigned int task_cpu(const struct task_struct *p) 2686 static inline unsigned int task_cpu(const struct task_struct *p)
2687 { 2687 {
2688 return task_thread_info(p)->cpu; 2688 return task_thread_info(p)->cpu;
2689 } 2689 }
2690 2690
2691 extern void set_task_cpu(struct task_struct *p, unsigned int cpu); 2691 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
2692 2692
2693 #else 2693 #else
2694 2694
2695 static inline unsigned int task_cpu(const struct task_struct *p) 2695 static inline unsigned int task_cpu(const struct task_struct *p)
2696 { 2696 {
2697 return 0; 2697 return 0;
2698 } 2698 }
2699 2699
2700 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) 2700 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
2701 { 2701 {
2702 } 2702 }
2703 2703
2704 #endif /* CONFIG_SMP */ 2704 #endif /* CONFIG_SMP */
2705 2705
2706 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); 2706 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
2707 extern long sched_getaffinity(pid_t pid, struct cpumask *mask); 2707 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2708 2708
2709 extern void normalize_rt_tasks(void); 2709 extern void normalize_rt_tasks(void);
2710 2710
2711 #ifdef CONFIG_CGROUP_SCHED 2711 #ifdef CONFIG_CGROUP_SCHED
2712 2712
2713 extern struct task_group root_task_group; 2713 extern struct task_group root_task_group;
2714 2714
2715 extern struct task_group *sched_create_group(struct task_group *parent); 2715 extern struct task_group *sched_create_group(struct task_group *parent);
2716 extern void sched_destroy_group(struct task_group *tg); 2716 extern void sched_destroy_group(struct task_group *tg);
2717 extern void sched_move_task(struct task_struct *tsk); 2717 extern void sched_move_task(struct task_struct *tsk);
2718 #ifdef CONFIG_FAIR_GROUP_SCHED 2718 #ifdef CONFIG_FAIR_GROUP_SCHED
2719 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); 2719 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
2720 extern unsigned long sched_group_shares(struct task_group *tg); 2720 extern unsigned long sched_group_shares(struct task_group *tg);
2721 #endif 2721 #endif
2722 #ifdef CONFIG_RT_GROUP_SCHED 2722 #ifdef CONFIG_RT_GROUP_SCHED
2723 extern int sched_group_set_rt_runtime(struct task_group *tg, 2723 extern int sched_group_set_rt_runtime(struct task_group *tg,
2724 long rt_runtime_us); 2724 long rt_runtime_us);
2725 extern long sched_group_rt_runtime(struct task_group *tg); 2725 extern long sched_group_rt_runtime(struct task_group *tg);
2726 extern int sched_group_set_rt_period(struct task_group *tg, 2726 extern int sched_group_set_rt_period(struct task_group *tg,
2727 long rt_period_us); 2727 long rt_period_us);
2728 extern long sched_group_rt_period(struct task_group *tg); 2728 extern long sched_group_rt_period(struct task_group *tg);
2729 extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); 2729 extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
2730 #endif 2730 #endif
2731 #endif /* CONFIG_CGROUP_SCHED */ 2731 #endif /* CONFIG_CGROUP_SCHED */
2732 2732
2733 extern int task_can_switch_user(struct user_struct *up, 2733 extern int task_can_switch_user(struct user_struct *up,
2734 struct task_struct *tsk); 2734 struct task_struct *tsk);
2735 2735
2736 #ifdef CONFIG_TASK_XACCT 2736 #ifdef CONFIG_TASK_XACCT
2737 static inline void add_rchar(struct task_struct *tsk, ssize_t amt) 2737 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
2738 { 2738 {
2739 tsk->ioac.rchar += amt; 2739 tsk->ioac.rchar += amt;
2740 } 2740 }
2741 2741
2742 static inline void add_wchar(struct task_struct *tsk, ssize_t amt) 2742 static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
2743 { 2743 {
2744 tsk->ioac.wchar += amt; 2744 tsk->ioac.wchar += amt;
2745 } 2745 }
2746 2746
2747 static inline void inc_syscr(struct task_struct *tsk) 2747 static inline void inc_syscr(struct task_struct *tsk)
2748 { 2748 {
2749 tsk->ioac.syscr++; 2749 tsk->ioac.syscr++;
2750 } 2750 }
2751 2751
2752 static inline void inc_syscw(struct task_struct *tsk) 2752 static inline void inc_syscw(struct task_struct *tsk)
2753 { 2753 {
2754 tsk->ioac.syscw++; 2754 tsk->ioac.syscw++;
2755 } 2755 }
2756 #else 2756 #else
2757 static inline void add_rchar(struct task_struct *tsk, ssize_t amt) 2757 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
2758 { 2758 {
2759 } 2759 }
2760 2760
2761 static inline void add_wchar(struct task_struct *tsk, ssize_t amt) 2761 static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
2762 { 2762 {
2763 } 2763 }
2764 2764
2765 static inline void inc_syscr(struct task_struct *tsk) 2765 static inline void inc_syscr(struct task_struct *tsk)
2766 { 2766 {
2767 } 2767 }
2768 2768
2769 static inline void inc_syscw(struct task_struct *tsk) 2769 static inline void inc_syscw(struct task_struct *tsk)
2770 { 2770 {
2771 } 2771 }
2772 #endif 2772 #endif
2773 2773
2774 #ifndef TASK_SIZE_OF 2774 #ifndef TASK_SIZE_OF
2775 #define TASK_SIZE_OF(tsk) TASK_SIZE 2775 #define TASK_SIZE_OF(tsk) TASK_SIZE
2776 #endif 2776 #endif
2777 2777
2778 #ifdef CONFIG_MM_OWNER 2778 #ifdef CONFIG_MM_OWNER
2779 extern void mm_update_next_owner(struct mm_struct *mm); 2779 extern void mm_update_next_owner(struct mm_struct *mm);
2780 extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); 2780 extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
2781 #else 2781 #else
2782 static inline void mm_update_next_owner(struct mm_struct *mm) 2782 static inline void mm_update_next_owner(struct mm_struct *mm)
2783 { 2783 {
2784 } 2784 }
2785 2785
2786 static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) 2786 static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
2787 { 2787 {
2788 } 2788 }
2789 #endif /* CONFIG_MM_OWNER */ 2789 #endif /* CONFIG_MM_OWNER */
2790 2790
2791 static inline unsigned long task_rlimit(const struct task_struct *tsk, 2791 static inline unsigned long task_rlimit(const struct task_struct *tsk,
2792 unsigned int limit) 2792 unsigned int limit)
2793 { 2793 {
2794 return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); 2794 return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
2795 } 2795 }
2796 2796
2797 static inline unsigned long task_rlimit_max(const struct task_struct *tsk, 2797 static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
2798 unsigned int limit) 2798 unsigned int limit)
2799 { 2799 {
2800 return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); 2800 return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
2801 } 2801 }
2802 2802
2803 static inline unsigned long rlimit(unsigned int limit) 2803 static inline unsigned long rlimit(unsigned int limit)
2804 { 2804 {
2805 return task_rlimit(current, limit); 2805 return task_rlimit(current, limit);
2806 } 2806 }
2807 2807
2808 static inline unsigned long rlimit_max(unsigned int limit) 2808 static inline unsigned long rlimit_max(unsigned int limit)
2809 { 2809 {
2810 return task_rlimit_max(current, limit); 2810 return task_rlimit_max(current, limit);
2811 } 2811 }
2812 2812
2813 #endif 2813 #endif
2814 2814
1 /* 1 /*
2 * linux/kernel/exit.c 2 * linux/kernel/exit.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7 #include <linux/mm.h> 7 #include <linux/mm.h>
8 #include <linux/slab.h> 8 #include <linux/slab.h>
9 #include <linux/interrupt.h> 9 #include <linux/interrupt.h>
10 #include <linux/module.h> 10 #include <linux/module.h>
11 #include <linux/capability.h> 11 #include <linux/capability.h>
12 #include <linux/completion.h> 12 #include <linux/completion.h>
13 #include <linux/personality.h> 13 #include <linux/personality.h>
14 #include <linux/tty.h> 14 #include <linux/tty.h>
15 #include <linux/iocontext.h> 15 #include <linux/iocontext.h>
16 #include <linux/key.h> 16 #include <linux/key.h>
17 #include <linux/security.h> 17 #include <linux/security.h>
18 #include <linux/cpu.h> 18 #include <linux/cpu.h>
19 #include <linux/acct.h> 19 #include <linux/acct.h>
20 #include <linux/tsacct_kern.h> 20 #include <linux/tsacct_kern.h>
21 #include <linux/file.h> 21 #include <linux/file.h>
22 #include <linux/fdtable.h> 22 #include <linux/fdtable.h>
23 #include <linux/binfmts.h> 23 #include <linux/binfmts.h>
24 #include <linux/nsproxy.h> 24 #include <linux/nsproxy.h>
25 #include <linux/pid_namespace.h> 25 #include <linux/pid_namespace.h>
26 #include <linux/ptrace.h> 26 #include <linux/ptrace.h>
27 #include <linux/profile.h> 27 #include <linux/profile.h>
28 #include <linux/mount.h> 28 #include <linux/mount.h>
29 #include <linux/proc_fs.h> 29 #include <linux/proc_fs.h>
30 #include <linux/kthread.h> 30 #include <linux/kthread.h>
31 #include <linux/mempolicy.h> 31 #include <linux/mempolicy.h>
32 #include <linux/taskstats_kern.h> 32 #include <linux/taskstats_kern.h>
33 #include <linux/delayacct.h> 33 #include <linux/delayacct.h>
34 #include <linux/freezer.h> 34 #include <linux/freezer.h>
35 #include <linux/cgroup.h> 35 #include <linux/cgroup.h>
36 #include <linux/syscalls.h> 36 #include <linux/syscalls.h>
37 #include <linux/signal.h> 37 #include <linux/signal.h>
38 #include <linux/posix-timers.h> 38 #include <linux/posix-timers.h>
39 #include <linux/cn_proc.h> 39 #include <linux/cn_proc.h>
40 #include <linux/mutex.h> 40 #include <linux/mutex.h>
41 #include <linux/futex.h> 41 #include <linux/futex.h>
42 #include <linux/pipe_fs_i.h> 42 #include <linux/pipe_fs_i.h>
43 #include <linux/audit.h> /* for audit_free() */ 43 #include <linux/audit.h> /* for audit_free() */
44 #include <linux/resource.h> 44 #include <linux/resource.h>
45 #include <linux/blkdev.h> 45 #include <linux/blkdev.h>
46 #include <linux/task_io_accounting_ops.h> 46 #include <linux/task_io_accounting_ops.h>
47 #include <linux/tracehook.h> 47 #include <linux/tracehook.h>
48 #include <linux/fs_struct.h> 48 #include <linux/fs_struct.h>
49 #include <linux/init_task.h> 49 #include <linux/init_task.h>
50 #include <linux/perf_event.h> 50 #include <linux/perf_event.h>
51 #include <trace/events/sched.h> 51 #include <trace/events/sched.h>
52 #include <linux/hw_breakpoint.h> 52 #include <linux/hw_breakpoint.h>
53 #include <linux/oom.h> 53 #include <linux/oom.h>
54 #include <linux/writeback.h> 54 #include <linux/writeback.h>
55 #include <linux/shm.h> 55 #include <linux/shm.h>
56 56
57 #include <asm/uaccess.h> 57 #include <asm/uaccess.h>
58 #include <asm/unistd.h> 58 #include <asm/unistd.h>
59 #include <asm/pgtable.h> 59 #include <asm/pgtable.h>
60 #include <asm/mmu_context.h> 60 #include <asm/mmu_context.h>
61 61
62 static void exit_mm(struct task_struct * tsk); 62 static void exit_mm(struct task_struct * tsk);
63 63
64 static void __unhash_process(struct task_struct *p, bool group_dead) 64 static void __unhash_process(struct task_struct *p, bool group_dead)
65 { 65 {
66 nr_threads--; 66 nr_threads--;
67 detach_pid(p, PIDTYPE_PID); 67 detach_pid(p, PIDTYPE_PID);
68 if (group_dead) { 68 if (group_dead) {
69 detach_pid(p, PIDTYPE_PGID); 69 detach_pid(p, PIDTYPE_PGID);
70 detach_pid(p, PIDTYPE_SID); 70 detach_pid(p, PIDTYPE_SID);
71 71
72 list_del_rcu(&p->tasks); 72 list_del_rcu(&p->tasks);
73 list_del_init(&p->sibling); 73 list_del_init(&p->sibling);
74 __this_cpu_dec(process_counts); 74 __this_cpu_dec(process_counts);
75 /* 75 /*
76 * If we are the last child process in a pid namespace to be 76 * If we are the last child process in a pid namespace to be
77 * reaped, notify the reaper sleeping zap_pid_ns_processes(). 77 * reaped, notify the reaper sleeping zap_pid_ns_processes().
78 */ 78 */
79 if (IS_ENABLED(CONFIG_PID_NS)) { 79 if (IS_ENABLED(CONFIG_PID_NS)) {
80 struct task_struct *parent = p->real_parent; 80 struct task_struct *parent = p->real_parent;
81 81
82 if ((task_active_pid_ns(parent)->child_reaper == parent) && 82 if ((task_active_pid_ns(parent)->child_reaper == parent) &&
83 list_empty(&parent->children) && 83 list_empty(&parent->children) &&
84 (parent->flags & PF_EXITING)) 84 (parent->flags & PF_EXITING))
85 wake_up_process(parent); 85 wake_up_process(parent);
86 } 86 }
87 } 87 }
88 list_del_rcu(&p->thread_group); 88 list_del_rcu(&p->thread_group);
89 } 89 }
90 90
91 /* 91 /*
92 * This function expects the tasklist_lock write-locked. 92 * This function expects the tasklist_lock write-locked.
93 */ 93 */
94 static void __exit_signal(struct task_struct *tsk) 94 static void __exit_signal(struct task_struct *tsk)
95 { 95 {
96 struct signal_struct *sig = tsk->signal; 96 struct signal_struct *sig = tsk->signal;
97 bool group_dead = thread_group_leader(tsk); 97 bool group_dead = thread_group_leader(tsk);
98 struct sighand_struct *sighand; 98 struct sighand_struct *sighand;
99 struct tty_struct *uninitialized_var(tty); 99 struct tty_struct *uninitialized_var(tty);
100 100
101 sighand = rcu_dereference_check(tsk->sighand, 101 sighand = rcu_dereference_check(tsk->sighand,
102 lockdep_tasklist_lock_is_held()); 102 lockdep_tasklist_lock_is_held());
103 spin_lock(&sighand->siglock); 103 spin_lock(&sighand->siglock);
104 104
105 posix_cpu_timers_exit(tsk); 105 posix_cpu_timers_exit(tsk);
106 if (group_dead) { 106 if (group_dead) {
107 posix_cpu_timers_exit_group(tsk); 107 posix_cpu_timers_exit_group(tsk);
108 tty = sig->tty; 108 tty = sig->tty;
109 sig->tty = NULL; 109 sig->tty = NULL;
110 } else { 110 } else {
111 /* 111 /*
112 * This can only happen if the caller is de_thread(). 112 * This can only happen if the caller is de_thread().
113 * FIXME: this is the temporary hack, we should teach 113 * FIXME: this is the temporary hack, we should teach
114 * posix-cpu-timers to handle this case correctly. 114 * posix-cpu-timers to handle this case correctly.
115 */ 115 */
116 if (unlikely(has_group_leader_pid(tsk))) 116 if (unlikely(has_group_leader_pid(tsk)))
117 posix_cpu_timers_exit_group(tsk); 117 posix_cpu_timers_exit_group(tsk);
118 118
119 /* 119 /*
120 * If there is any task waiting for the group exit 120 * If there is any task waiting for the group exit
121 * then notify it: 121 * then notify it:
122 */ 122 */
123 if (sig->notify_count > 0 && !--sig->notify_count) 123 if (sig->notify_count > 0 && !--sig->notify_count)
124 wake_up_process(sig->group_exit_task); 124 wake_up_process(sig->group_exit_task);
125 125
126 if (tsk == sig->curr_target) 126 if (tsk == sig->curr_target)
127 sig->curr_target = next_thread(tsk); 127 sig->curr_target = next_thread(tsk);
128 /* 128 /*
129 * Accumulate here the counters for all threads but the 129 * Accumulate here the counters for all threads but the
130 * group leader as they die, so they can be added into 130 * group leader as they die, so they can be added into
131 * the process-wide totals when those are taken. 131 * the process-wide totals when those are taken.
132 * The group leader stays around as a zombie as long 132 * The group leader stays around as a zombie as long
133 * as there are other threads. When it gets reaped, 133 * as there are other threads. When it gets reaped,
134 * the exit.c code will add its counts into these totals. 134 * the exit.c code will add its counts into these totals.
135 * We won't ever get here for the group leader, since it 135 * We won't ever get here for the group leader, since it
136 * will have been the last reference on the signal_struct. 136 * will have been the last reference on the signal_struct.
137 */ 137 */
138 sig->utime += tsk->utime; 138 sig->utime += tsk->utime;
139 sig->stime += tsk->stime; 139 sig->stime += tsk->stime;
140 sig->gtime += tsk->gtime; 140 sig->gtime += tsk->gtime;
141 sig->min_flt += tsk->min_flt; 141 sig->min_flt += tsk->min_flt;
142 sig->maj_flt += tsk->maj_flt; 142 sig->maj_flt += tsk->maj_flt;
143 sig->nvcsw += tsk->nvcsw; 143 sig->nvcsw += tsk->nvcsw;
144 sig->nivcsw += tsk->nivcsw; 144 sig->nivcsw += tsk->nivcsw;
145 sig->inblock += task_io_get_inblock(tsk); 145 sig->inblock += task_io_get_inblock(tsk);
146 sig->oublock += task_io_get_oublock(tsk); 146 sig->oublock += task_io_get_oublock(tsk);
147 task_io_accounting_add(&sig->ioac, &tsk->ioac); 147 task_io_accounting_add(&sig->ioac, &tsk->ioac);
148 sig->sum_sched_runtime += tsk->se.sum_exec_runtime; 148 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
149 } 149 }
150 150
151 sig->nr_threads--; 151 sig->nr_threads--;
152 __unhash_process(tsk, group_dead); 152 __unhash_process(tsk, group_dead);
153 153
154 /* 154 /*
155 * Do this under ->siglock, we can race with another thread 155 * Do this under ->siglock, we can race with another thread
156 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. 156 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
157 */ 157 */
158 flush_sigqueue(&tsk->pending); 158 flush_sigqueue(&tsk->pending);
159 tsk->sighand = NULL; 159 tsk->sighand = NULL;
160 spin_unlock(&sighand->siglock); 160 spin_unlock(&sighand->siglock);
161 161
162 __cleanup_sighand(sighand); 162 __cleanup_sighand(sighand);
163 clear_tsk_thread_flag(tsk,TIF_SIGPENDING); 163 clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
164 if (group_dead) { 164 if (group_dead) {
165 flush_sigqueue(&sig->shared_pending); 165 flush_sigqueue(&sig->shared_pending);
166 tty_kref_put(tty); 166 tty_kref_put(tty);
167 } 167 }
168 } 168 }
169 169
170 static void delayed_put_task_struct(struct rcu_head *rhp) 170 static void delayed_put_task_struct(struct rcu_head *rhp)
171 { 171 {
172 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 172 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
173 173
174 perf_event_delayed_put(tsk); 174 perf_event_delayed_put(tsk);
175 trace_sched_process_free(tsk); 175 trace_sched_process_free(tsk);
176 put_task_struct(tsk); 176 put_task_struct(tsk);
177 } 177 }
178 178
179 179
180 void release_task(struct task_struct * p) 180 void release_task(struct task_struct * p)
181 { 181 {
182 struct task_struct *leader; 182 struct task_struct *leader;
183 int zap_leader; 183 int zap_leader;
184 repeat: 184 repeat:
185 /* don't need to get the RCU readlock here - the process is dead and 185 /* don't need to get the RCU readlock here - the process is dead and
186 * can't be modifying its own credentials. But shut RCU-lockdep up */ 186 * can't be modifying its own credentials. But shut RCU-lockdep up */
187 rcu_read_lock(); 187 rcu_read_lock();
188 atomic_dec(&__task_cred(p)->user->processes); 188 atomic_dec(&__task_cred(p)->user->processes);
189 rcu_read_unlock(); 189 rcu_read_unlock();
190 190
191 proc_flush_task(p); 191 proc_flush_task(p);
192 192
193 write_lock_irq(&tasklist_lock); 193 write_lock_irq(&tasklist_lock);
194 ptrace_release_task(p); 194 ptrace_release_task(p);
195 __exit_signal(p); 195 __exit_signal(p);
196 196
197 /* 197 /*
198 * If we are the last non-leader member of the thread 198 * If we are the last non-leader member of the thread
199 * group, and the leader is zombie, then notify the 199 * group, and the leader is zombie, then notify the
200 * group leader's parent process. (if it wants notification.) 200 * group leader's parent process. (if it wants notification.)
201 */ 201 */
202 zap_leader = 0; 202 zap_leader = 0;
203 leader = p->group_leader; 203 leader = p->group_leader;
204 if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { 204 if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
205 /* 205 /*
206 * If we were the last child thread and the leader has 206 * If we were the last child thread and the leader has
207 * exited already, and the leader's parent ignores SIGCHLD, 207 * exited already, and the leader's parent ignores SIGCHLD,
208 * then we are the one who should release the leader. 208 * then we are the one who should release the leader.
209 */ 209 */
210 zap_leader = do_notify_parent(leader, leader->exit_signal); 210 zap_leader = do_notify_parent(leader, leader->exit_signal);
211 if (zap_leader) 211 if (zap_leader)
212 leader->exit_state = EXIT_DEAD; 212 leader->exit_state = EXIT_DEAD;
213 } 213 }
214 214
215 write_unlock_irq(&tasklist_lock); 215 write_unlock_irq(&tasklist_lock);
216 release_thread(p); 216 release_thread(p);
217 call_rcu(&p->rcu, delayed_put_task_struct); 217 call_rcu(&p->rcu, delayed_put_task_struct);
218 218
219 p = leader; 219 p = leader;
220 if (unlikely(zap_leader)) 220 if (unlikely(zap_leader))
221 goto repeat; 221 goto repeat;
222 } 222 }
223 223
224 /* 224 /*
225 * This checks not only the pgrp, but falls back on the pid if no 225 * This checks not only the pgrp, but falls back on the pid if no
226 * satisfactory pgrp is found. I dunno - gdb doesn't work correctly 226 * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
227 * without this... 227 * without this...
228 * 228 *
229 * The caller must hold rcu lock or the tasklist lock. 229 * The caller must hold rcu lock or the tasklist lock.
230 */ 230 */
231 struct pid *session_of_pgrp(struct pid *pgrp) 231 struct pid *session_of_pgrp(struct pid *pgrp)
232 { 232 {
233 struct task_struct *p; 233 struct task_struct *p;
234 struct pid *sid = NULL; 234 struct pid *sid = NULL;
235 235
236 p = pid_task(pgrp, PIDTYPE_PGID); 236 p = pid_task(pgrp, PIDTYPE_PGID);
237 if (p == NULL) 237 if (p == NULL)
238 p = pid_task(pgrp, PIDTYPE_PID); 238 p = pid_task(pgrp, PIDTYPE_PID);
239 if (p != NULL) 239 if (p != NULL)
240 sid = task_session(p); 240 sid = task_session(p);
241 241
242 return sid; 242 return sid;
243 } 243 }
244 244
245 /* 245 /*
246 * Determine if a process group is "orphaned", according to the POSIX 246 * Determine if a process group is "orphaned", according to the POSIX
247 * definition in 2.2.2.52. Orphaned process groups are not to be affected 247 * definition in 2.2.2.52. Orphaned process groups are not to be affected
248 * by terminal-generated stop signals. Newly orphaned process groups are 248 * by terminal-generated stop signals. Newly orphaned process groups are
249 * to receive a SIGHUP and a SIGCONT. 249 * to receive a SIGHUP and a SIGCONT.
250 * 250 *
251 * "I ask you, have you ever known what it is to be an orphan?" 251 * "I ask you, have you ever known what it is to be an orphan?"
252 */ 252 */
253 static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) 253 static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
254 { 254 {
255 struct task_struct *p; 255 struct task_struct *p;
256 256
257 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 257 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
258 if ((p == ignored_task) || 258 if ((p == ignored_task) ||
259 (p->exit_state && thread_group_empty(p)) || 259 (p->exit_state && thread_group_empty(p)) ||
260 is_global_init(p->real_parent)) 260 is_global_init(p->real_parent))
261 continue; 261 continue;
262 262
263 if (task_pgrp(p->real_parent) != pgrp && 263 if (task_pgrp(p->real_parent) != pgrp &&
264 task_session(p->real_parent) == task_session(p)) 264 task_session(p->real_parent) == task_session(p))
265 return 0; 265 return 0;
266 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 266 } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
267 267
268 return 1; 268 return 1;
269 } 269 }
270 270
271 int is_current_pgrp_orphaned(void) 271 int is_current_pgrp_orphaned(void)
272 { 272 {
273 int retval; 273 int retval;
274 274
275 read_lock(&tasklist_lock); 275 read_lock(&tasklist_lock);
276 retval = will_become_orphaned_pgrp(task_pgrp(current), NULL); 276 retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
277 read_unlock(&tasklist_lock); 277 read_unlock(&tasklist_lock);
278 278
279 return retval; 279 return retval;
280 } 280 }
281 281
282 static bool has_stopped_jobs(struct pid *pgrp) 282 static bool has_stopped_jobs(struct pid *pgrp)
283 { 283 {
284 struct task_struct *p; 284 struct task_struct *p;
285 285
286 do_each_pid_task(pgrp, PIDTYPE_PGID, p) { 286 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
287 if (p->signal->flags & SIGNAL_STOP_STOPPED) 287 if (p->signal->flags & SIGNAL_STOP_STOPPED)
288 return true; 288 return true;
289 } while_each_pid_task(pgrp, PIDTYPE_PGID, p); 289 } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
290 290
291 return false; 291 return false;
292 } 292 }
293 293
294 /* 294 /*
295 * Check to see if any process groups have become orphaned as 295 * Check to see if any process groups have become orphaned as
296 * a result of our exiting, and if they have any stopped jobs, 296 * a result of our exiting, and if they have any stopped jobs,
297 * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) 297 * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
298 */ 298 */
299 static void 299 static void
300 kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) 300 kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
301 { 301 {
302 struct pid *pgrp = task_pgrp(tsk); 302 struct pid *pgrp = task_pgrp(tsk);
303 struct task_struct *ignored_task = tsk; 303 struct task_struct *ignored_task = tsk;
304 304
305 if (!parent) 305 if (!parent)
306 /* exit: our father is in a different pgrp than 306 /* exit: our father is in a different pgrp than
307 * we are and we were the only connection outside. 307 * we are and we were the only connection outside.
308 */ 308 */
309 parent = tsk->real_parent; 309 parent = tsk->real_parent;
310 else 310 else
311 /* reparent: our child is in a different pgrp than 311 /* reparent: our child is in a different pgrp than
312 * we are, and it was the only connection outside. 312 * we are, and it was the only connection outside.
313 */ 313 */
314 ignored_task = NULL; 314 ignored_task = NULL;
315 315
316 if (task_pgrp(parent) != pgrp && 316 if (task_pgrp(parent) != pgrp &&
317 task_session(parent) == task_session(tsk) && 317 task_session(parent) == task_session(tsk) &&
318 will_become_orphaned_pgrp(pgrp, ignored_task) && 318 will_become_orphaned_pgrp(pgrp, ignored_task) &&
319 has_stopped_jobs(pgrp)) { 319 has_stopped_jobs(pgrp)) {
320 __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); 320 __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
321 __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); 321 __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
322 } 322 }
323 } 323 }
324 324
325 /** 325 /**
326 * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd 326 * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
327 * 327 *
328 * If a kernel thread is launched as a result of a system call, or if 328 * If a kernel thread is launched as a result of a system call, or if
329 * it ever exits, it should generally reparent itself to kthreadd so it 329 * it ever exits, it should generally reparent itself to kthreadd so it
330 * isn't in the way of other processes and is correctly cleaned up on exit. 330 * isn't in the way of other processes and is correctly cleaned up on exit.
331 * 331 *
332 * The various task state such as scheduling policy and priority may have 332 * The various task state such as scheduling policy and priority may have
333 * been inherited from a user process, so we reset them to sane values here. 333 * been inherited from a user process, so we reset them to sane values here.
334 * 334 *
335 * NOTE that reparent_to_kthreadd() gives the caller full capabilities. 335 * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
336 */ 336 */
337 static void reparent_to_kthreadd(void) 337 static void reparent_to_kthreadd(void)
338 { 338 {
339 write_lock_irq(&tasklist_lock); 339 write_lock_irq(&tasklist_lock);
340 340
341 ptrace_unlink(current); 341 ptrace_unlink(current);
342 /* Reparent to init */ 342 /* Reparent to init */
343 current->real_parent = current->parent = kthreadd_task; 343 current->real_parent = current->parent = kthreadd_task;
344 list_move_tail(&current->sibling, &current->real_parent->children); 344 list_move_tail(&current->sibling, &current->real_parent->children);
345 345
346 /* Set the exit signal to SIGCHLD so we signal init on exit */ 346 /* Set the exit signal to SIGCHLD so we signal init on exit */
347 current->exit_signal = SIGCHLD; 347 current->exit_signal = SIGCHLD;
348 348
349 if (task_nice(current) < 0) 349 if (task_nice(current) < 0)
350 set_user_nice(current, 0); 350 set_user_nice(current, 0);
351 /* cpus_allowed? */ 351 /* cpus_allowed? */
352 /* rt_priority? */ 352 /* rt_priority? */
353 /* signals? */ 353 /* signals? */
354 memcpy(current->signal->rlim, init_task.signal->rlim, 354 memcpy(current->signal->rlim, init_task.signal->rlim,
355 sizeof(current->signal->rlim)); 355 sizeof(current->signal->rlim));
356 356
357 atomic_inc(&init_cred.usage); 357 atomic_inc(&init_cred.usage);
358 commit_creds(&init_cred); 358 commit_creds(&init_cred);
359 write_unlock_irq(&tasklist_lock); 359 write_unlock_irq(&tasklist_lock);
360 } 360 }
361 361
362 void __set_special_pids(struct pid *pid) 362 void __set_special_pids(struct pid *pid)
363 { 363 {
364 struct task_struct *curr = current->group_leader; 364 struct task_struct *curr = current->group_leader;
365 365
366 if (task_session(curr) != pid) 366 if (task_session(curr) != pid)
367 change_pid(curr, PIDTYPE_SID, pid); 367 change_pid(curr, PIDTYPE_SID, pid);
368 368
369 if (task_pgrp(curr) != pid) 369 if (task_pgrp(curr) != pid)
370 change_pid(curr, PIDTYPE_PGID, pid); 370 change_pid(curr, PIDTYPE_PGID, pid);
371 } 371 }
372 372
373 static void set_special_pids(struct pid *pid) 373 static void set_special_pids(struct pid *pid)
374 { 374 {
375 write_lock_irq(&tasklist_lock); 375 write_lock_irq(&tasklist_lock);
376 __set_special_pids(pid); 376 __set_special_pids(pid);
377 write_unlock_irq(&tasklist_lock); 377 write_unlock_irq(&tasklist_lock);
378 } 378 }
379 379
380 /* 380 /*
381 * Let kernel threads use this to say that they allow a certain signal. 381 * Let kernel threads use this to say that they allow a certain signal.
382 * Must not be used if kthread was cloned with CLONE_SIGHAND. 382 * Must not be used if kthread was cloned with CLONE_SIGHAND.
383 */ 383 */
384 int allow_signal(int sig) 384 int allow_signal(int sig)
385 { 385 {
386 if (!valid_signal(sig) || sig < 1) 386 if (!valid_signal(sig) || sig < 1)
387 return -EINVAL; 387 return -EINVAL;
388 388
389 spin_lock_irq(&current->sighand->siglock); 389 spin_lock_irq(&current->sighand->siglock);
390 /* This is only needed for daemonize()'ed kthreads */ 390 /* This is only needed for daemonize()'ed kthreads */
391 sigdelset(&current->blocked, sig); 391 sigdelset(&current->blocked, sig);
392 /* 392 /*
393 * Kernel threads handle their own signals. Let the signal code 393 * Kernel threads handle their own signals. Let the signal code
394 * know it'll be handled, so that they don't get converted to 394 * know it'll be handled, so that they don't get converted to
395 * SIGKILL or just silently dropped. 395 * SIGKILL or just silently dropped.
396 */ 396 */
397 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; 397 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
398 recalc_sigpending(); 398 recalc_sigpending();
399 spin_unlock_irq(&current->sighand->siglock); 399 spin_unlock_irq(&current->sighand->siglock);
400 return 0; 400 return 0;
401 } 401 }
402 402
403 EXPORT_SYMBOL(allow_signal); 403 EXPORT_SYMBOL(allow_signal);
404 404
405 int disallow_signal(int sig) 405 int disallow_signal(int sig)
406 { 406 {
407 if (!valid_signal(sig) || sig < 1) 407 if (!valid_signal(sig) || sig < 1)
408 return -EINVAL; 408 return -EINVAL;
409 409
410 spin_lock_irq(&current->sighand->siglock); 410 spin_lock_irq(&current->sighand->siglock);
411 current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN; 411 current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
412 recalc_sigpending(); 412 recalc_sigpending();
413 spin_unlock_irq(&current->sighand->siglock); 413 spin_unlock_irq(&current->sighand->siglock);
414 return 0; 414 return 0;
415 } 415 }
416 416
417 EXPORT_SYMBOL(disallow_signal); 417 EXPORT_SYMBOL(disallow_signal);
418 418
419 /* 419 /*
420 * Put all the gunge required to become a kernel thread without 420 * Put all the gunge required to become a kernel thread without
421 * attached user resources in one place where it belongs. 421 * attached user resources in one place where it belongs.
422 */ 422 */
423 423
424 void daemonize(const char *name, ...) 424 void daemonize(const char *name, ...)
425 { 425 {
426 va_list args; 426 va_list args;
427 sigset_t blocked; 427 sigset_t blocked;
428 428
429 va_start(args, name); 429 va_start(args, name);
430 vsnprintf(current->comm, sizeof(current->comm), name, args); 430 vsnprintf(current->comm, sizeof(current->comm), name, args);
431 va_end(args); 431 va_end(args);
432 432
433 /* 433 /*
434 * If we were started as result of loading a module, close all of the 434 * If we were started as result of loading a module, close all of the
435 * user space pages. We don't need them, and if we didn't close them 435 * user space pages. We don't need them, and if we didn't close them
436 * they would be locked into memory. 436 * they would be locked into memory.
437 */ 437 */
438 exit_mm(current); 438 exit_mm(current);
439 /* 439 /*
440 * We don't want to get frozen, in case system-wide hibernation 440 * We don't want to get frozen, in case system-wide hibernation
441 * or suspend transition begins right now. 441 * or suspend transition begins right now.
442 */ 442 */
443 current->flags |= (PF_NOFREEZE | PF_KTHREAD); 443 current->flags |= (PF_NOFREEZE | PF_KTHREAD);
444 444
445 if (current->nsproxy != &init_nsproxy) { 445 if (current->nsproxy != &init_nsproxy) {
446 get_nsproxy(&init_nsproxy); 446 get_nsproxy(&init_nsproxy);
447 switch_task_namespaces(current, &init_nsproxy); 447 switch_task_namespaces(current, &init_nsproxy);
448 } 448 }
449 set_special_pids(&init_struct_pid); 449 set_special_pids(&init_struct_pid);
450 proc_clear_tty(current); 450 proc_clear_tty(current);
451 451
452 /* Block and flush all signals */ 452 /* Block and flush all signals */
453 sigfillset(&blocked); 453 sigfillset(&blocked);
454 sigprocmask(SIG_BLOCK, &blocked, NULL); 454 sigprocmask(SIG_BLOCK, &blocked, NULL);
455 flush_signals(current); 455 flush_signals(current);
456 456
457 /* Become as one with the init task */ 457 /* Become as one with the init task */
458 458
459 daemonize_fs_struct(); 459 daemonize_fs_struct();
460 daemonize_descriptors(); 460 daemonize_descriptors();
461 461
462 reparent_to_kthreadd(); 462 reparent_to_kthreadd();
463 } 463 }
464 464
465 EXPORT_SYMBOL(daemonize); 465 EXPORT_SYMBOL(daemonize);
466 466
467 #ifdef CONFIG_MM_OWNER 467 #ifdef CONFIG_MM_OWNER
468 /* 468 /*
469 * A task is exiting. If it owned this mm, find a new owner for the mm. 469 * A task is exiting. If it owned this mm, find a new owner for the mm.
470 */ 470 */
471 void mm_update_next_owner(struct mm_struct *mm) 471 void mm_update_next_owner(struct mm_struct *mm)
472 { 472 {
473 struct task_struct *c, *g, *p = current; 473 struct task_struct *c, *g, *p = current;
474 474
475 retry: 475 retry:
476 /* 476 /*
477 * If the exiting or execing task is not the owner, it's 477 * If the exiting or execing task is not the owner, it's
478 * someone else's problem. 478 * someone else's problem.
479 */ 479 */
480 if (mm->owner != p) 480 if (mm->owner != p)
481 return; 481 return;
482 /* 482 /*
483 * The current owner is exiting/execing and there are no other 483 * The current owner is exiting/execing and there are no other
484 * candidates. Do not leave the mm pointing to a possibly 484 * candidates. Do not leave the mm pointing to a possibly
485 * freed task structure. 485 * freed task structure.
486 */ 486 */
487 if (atomic_read(&mm->mm_users) <= 1) { 487 if (atomic_read(&mm->mm_users) <= 1) {
488 mm->owner = NULL; 488 mm->owner = NULL;
489 return; 489 return;
490 } 490 }
491 491
492 read_lock(&tasklist_lock); 492 read_lock(&tasklist_lock);
493 /* 493 /*
494 * Search in the children 494 * Search in the children
495 */ 495 */
496 list_for_each_entry(c, &p->children, sibling) { 496 list_for_each_entry(c, &p->children, sibling) {
497 if (c->mm == mm) 497 if (c->mm == mm)
498 goto assign_new_owner; 498 goto assign_new_owner;
499 } 499 }
500 500
501 /* 501 /*
502 * Search in the siblings 502 * Search in the siblings
503 */ 503 */
504 list_for_each_entry(c, &p->real_parent->children, sibling) { 504 list_for_each_entry(c, &p->real_parent->children, sibling) {
505 if (c->mm == mm) 505 if (c->mm == mm)
506 goto assign_new_owner; 506 goto assign_new_owner;
507 } 507 }
508 508
509 /* 509 /*
510 * Search through everything else. We should not get 510 * Search through everything else. We should not get
511 * here often 511 * here often
512 */ 512 */
513 do_each_thread(g, c) { 513 do_each_thread(g, c) {
514 if (c->mm == mm) 514 if (c->mm == mm)
515 goto assign_new_owner; 515 goto assign_new_owner;
516 } while_each_thread(g, c); 516 } while_each_thread(g, c);
517 517
518 read_unlock(&tasklist_lock); 518 read_unlock(&tasklist_lock);
519 /* 519 /*
520 * We found no owner yet mm_users > 1: this implies that we are 520 * We found no owner yet mm_users > 1: this implies that we are
521 * most likely racing with swapoff (try_to_unuse()) or /proc or 521 * most likely racing with swapoff (try_to_unuse()) or /proc or
522 * ptrace or page migration (get_task_mm()). Mark owner as NULL. 522 * ptrace or page migration (get_task_mm()). Mark owner as NULL.
523 */ 523 */
524 mm->owner = NULL; 524 mm->owner = NULL;
525 return; 525 return;
526 526
527 assign_new_owner: 527 assign_new_owner:
528 BUG_ON(c == p); 528 BUG_ON(c == p);
529 get_task_struct(c); 529 get_task_struct(c);
530 /* 530 /*
531 * The task_lock protects c->mm from changing. 531 * The task_lock protects c->mm from changing.
532 * We always want mm->owner->mm == mm 532 * We always want mm->owner->mm == mm
533 */ 533 */
534 task_lock(c); 534 task_lock(c);
535 /* 535 /*
536 * Delay read_unlock() till we have the task_lock() 536 * Delay read_unlock() till we have the task_lock()
537 * to ensure that c does not slip away underneath us 537 * to ensure that c does not slip away underneath us
538 */ 538 */
539 read_unlock(&tasklist_lock); 539 read_unlock(&tasklist_lock);
540 if (c->mm != mm) { 540 if (c->mm != mm) {
541 task_unlock(c); 541 task_unlock(c);
542 put_task_struct(c); 542 put_task_struct(c);
543 goto retry; 543 goto retry;
544 } 544 }
545 mm->owner = c; 545 mm->owner = c;
546 task_unlock(c); 546 task_unlock(c);
547 put_task_struct(c); 547 put_task_struct(c);
548 } 548 }
549 #endif /* CONFIG_MM_OWNER */ 549 #endif /* CONFIG_MM_OWNER */
550 550
551 /* 551 /*
552 * Turn us into a lazy TLB process if we 552 * Turn us into a lazy TLB process if we
553 * aren't already.. 553 * aren't already..
554 */ 554 */
555 static void exit_mm(struct task_struct * tsk) 555 static void exit_mm(struct task_struct * tsk)
556 { 556 {
557 struct mm_struct *mm = tsk->mm; 557 struct mm_struct *mm = tsk->mm;
558 struct core_state *core_state; 558 struct core_state *core_state;
559 559
560 mm_release(tsk, mm); 560 mm_release(tsk, mm);
561 if (!mm) 561 if (!mm)
562 return; 562 return;
563 sync_mm_rss(mm); 563 sync_mm_rss(mm);
564 /* 564 /*
565 * Serialize with any possible pending coredump. 565 * Serialize with any possible pending coredump.
566 * We must hold mmap_sem around checking core_state 566 * We must hold mmap_sem around checking core_state
567 * and clearing tsk->mm. The core-inducing thread 567 * and clearing tsk->mm. The core-inducing thread
568 * will increment ->nr_threads for each thread in the 568 * will increment ->nr_threads for each thread in the
569 * group with ->mm != NULL. 569 * group with ->mm != NULL.
570 */ 570 */
571 down_read(&mm->mmap_sem); 571 down_read(&mm->mmap_sem);
572 core_state = mm->core_state; 572 core_state = mm->core_state;
573 if (core_state) { 573 if (core_state) {
574 struct core_thread self; 574 struct core_thread self;
575 up_read(&mm->mmap_sem); 575 up_read(&mm->mmap_sem);
576 576
577 self.task = tsk; 577 self.task = tsk;
578 self.next = xchg(&core_state->dumper.next, &self); 578 self.next = xchg(&core_state->dumper.next, &self);
579 /* 579 /*
580 * Implies mb(), the result of xchg() must be visible 580 * Implies mb(), the result of xchg() must be visible
581 * to core_state->dumper. 581 * to core_state->dumper.
582 */ 582 */
583 if (atomic_dec_and_test(&core_state->nr_threads)) 583 if (atomic_dec_and_test(&core_state->nr_threads))
584 complete(&core_state->startup); 584 complete(&core_state->startup);
585 585
586 for (;;) { 586 for (;;) {
587 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 587 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
588 if (!self.task) /* see coredump_finish() */ 588 if (!self.task) /* see coredump_finish() */
589 break; 589 break;
590 schedule(); 590 schedule();
591 } 591 }
592 __set_task_state(tsk, TASK_RUNNING); 592 __set_task_state(tsk, TASK_RUNNING);
593 down_read(&mm->mmap_sem); 593 down_read(&mm->mmap_sem);
594 } 594 }
595 atomic_inc(&mm->mm_count); 595 atomic_inc(&mm->mm_count);
596 BUG_ON(mm != tsk->active_mm); 596 BUG_ON(mm != tsk->active_mm);
597 /* more a memory barrier than a real lock */ 597 /* more a memory barrier than a real lock */
598 task_lock(tsk); 598 task_lock(tsk);
599 tsk->mm = NULL; 599 tsk->mm = NULL;
600 up_read(&mm->mmap_sem); 600 up_read(&mm->mmap_sem);
601 enter_lazy_tlb(mm, current); 601 enter_lazy_tlb(mm, current);
602 task_unlock(tsk); 602 task_unlock(tsk);
603 mm_update_next_owner(mm); 603 mm_update_next_owner(mm);
604 mmput(mm); 604 mmput(mm);
605 } 605 }
606 606
607 /* 607 /*
608 * When we die, we re-parent all our children, and try to: 608 * When we die, we re-parent all our children, and try to:
609 * 1. give them to another thread in our thread group, if such a member exists 609 * 1. give them to another thread in our thread group, if such a member exists
610 * 2. give it to the first ancestor process which prctl'd itself as a 610 * 2. give it to the first ancestor process which prctl'd itself as a
611 * child_subreaper for its children (like a service manager) 611 * child_subreaper for its children (like a service manager)
612 * 3. give it to the init process (PID 1) in our pid namespace 612 * 3. give it to the init process (PID 1) in our pid namespace
613 */ 613 */
614 static struct task_struct *find_new_reaper(struct task_struct *father) 614 static struct task_struct *find_new_reaper(struct task_struct *father)
615 __releases(&tasklist_lock) 615 __releases(&tasklist_lock)
616 __acquires(&tasklist_lock) 616 __acquires(&tasklist_lock)
617 { 617 {
618 struct pid_namespace *pid_ns = task_active_pid_ns(father); 618 struct pid_namespace *pid_ns = task_active_pid_ns(father);
619 struct task_struct *thread; 619 struct task_struct *thread;
620 620
621 thread = father; 621 thread = father;
622 while_each_thread(father, thread) { 622 while_each_thread(father, thread) {
623 if (thread->flags & PF_EXITING) 623 if (thread->flags & PF_EXITING)
624 continue; 624 continue;
625 if (unlikely(pid_ns->child_reaper == father)) 625 if (unlikely(pid_ns->child_reaper == father))
626 pid_ns->child_reaper = thread; 626 pid_ns->child_reaper = thread;
627 return thread; 627 return thread;
628 } 628 }
629 629
630 if (unlikely(pid_ns->child_reaper == father)) { 630 if (unlikely(pid_ns->child_reaper == father)) {
631 write_unlock_irq(&tasklist_lock); 631 write_unlock_irq(&tasklist_lock);
632 if (unlikely(pid_ns == &init_pid_ns)) { 632 if (unlikely(pid_ns == &init_pid_ns)) {
633 panic("Attempted to kill init! exitcode=0x%08x\n", 633 panic("Attempted to kill init! exitcode=0x%08x\n",
634 father->signal->group_exit_code ?: 634 father->signal->group_exit_code ?:
635 father->exit_code); 635 father->exit_code);
636 } 636 }
637 637
638 zap_pid_ns_processes(pid_ns); 638 zap_pid_ns_processes(pid_ns);
639 write_lock_irq(&tasklist_lock); 639 write_lock_irq(&tasklist_lock);
640 } else if (father->signal->has_child_subreaper) { 640 } else if (father->signal->has_child_subreaper) {
641 struct task_struct *reaper; 641 struct task_struct *reaper;
642 642
643 /* 643 /*
644 * Find the first ancestor marked as child_subreaper. 644 * Find the first ancestor marked as child_subreaper.
645 * Note that the code below checks same_thread_group(reaper, 645 * Note that the code below checks same_thread_group(reaper,
646 * pid_ns->child_reaper). This is what we need to DTRT in a 646 * pid_ns->child_reaper). This is what we need to DTRT in a
647 * PID namespace. However we still need the check above, see 647 * PID namespace. However we still need the check above, see
648 * http://marc.info/?l=linux-kernel&m=131385460420380 648 * http://marc.info/?l=linux-kernel&m=131385460420380
649 */ 649 */
650 for (reaper = father->real_parent; 650 for (reaper = father->real_parent;
651 reaper != &init_task; 651 reaper != &init_task;
652 reaper = reaper->real_parent) { 652 reaper = reaper->real_parent) {
653 if (same_thread_group(reaper, pid_ns->child_reaper)) 653 if (same_thread_group(reaper, pid_ns->child_reaper))
654 break; 654 break;
655 if (!reaper->signal->is_child_subreaper) 655 if (!reaper->signal->is_child_subreaper)
656 continue; 656 continue;
657 thread = reaper; 657 thread = reaper;
658 do { 658 do {
659 if (!(thread->flags & PF_EXITING)) 659 if (!(thread->flags & PF_EXITING))
660 return reaper; 660 return reaper;
661 } while_each_thread(reaper, thread); 661 } while_each_thread(reaper, thread);
662 } 662 }
663 } 663 }
664 664
665 return pid_ns->child_reaper; 665 return pid_ns->child_reaper;
666 } 666 }
667 667
668 /* 668 /*
669 * Any that need to be release_task'd are put on the @dead list. 669 * Any that need to be release_task'd are put on the @dead list.
670 */ 670 */
671 static void reparent_leader(struct task_struct *father, struct task_struct *p, 671 static void reparent_leader(struct task_struct *father, struct task_struct *p,
672 struct list_head *dead) 672 struct list_head *dead)
673 { 673 {
674 list_move_tail(&p->sibling, &p->real_parent->children); 674 list_move_tail(&p->sibling, &p->real_parent->children);
675 675
676 if (p->exit_state == EXIT_DEAD) 676 if (p->exit_state == EXIT_DEAD)
677 return; 677 return;
678 /* 678 /*
679 * If this is a threaded reparent there is no need to 679 * If this is a threaded reparent there is no need to
680 * notify anyone anything has happened. 680 * notify anyone anything has happened.
681 */ 681 */
682 if (same_thread_group(p->real_parent, father)) 682 if (same_thread_group(p->real_parent, father))
683 return; 683 return;
684 684
685 /* We don't want people slaying init. */ 685 /* We don't want people slaying init. */
686 p->exit_signal = SIGCHLD; 686 p->exit_signal = SIGCHLD;
687 687
688 /* If it has exited notify the new parent about this child's death. */ 688 /* If it has exited notify the new parent about this child's death. */
689 if (!p->ptrace && 689 if (!p->ptrace &&
690 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { 690 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
691 if (do_notify_parent(p, p->exit_signal)) { 691 if (do_notify_parent(p, p->exit_signal)) {
692 p->exit_state = EXIT_DEAD; 692 p->exit_state = EXIT_DEAD;
693 list_move_tail(&p->sibling, dead); 693 list_move_tail(&p->sibling, dead);
694 } 694 }
695 } 695 }
696 696
697 kill_orphaned_pgrp(p, father); 697 kill_orphaned_pgrp(p, father);
698 } 698 }
699 699
700 static void forget_original_parent(struct task_struct *father) 700 static void forget_original_parent(struct task_struct *father)
701 { 701 {
702 struct task_struct *p, *n, *reaper; 702 struct task_struct *p, *n, *reaper;
703 LIST_HEAD(dead_children); 703 LIST_HEAD(dead_children);
704 704
705 write_lock_irq(&tasklist_lock); 705 write_lock_irq(&tasklist_lock);
706 /* 706 /*
707 * Note that exit_ptrace() and find_new_reaper() might 707 * Note that exit_ptrace() and find_new_reaper() might
708 * drop tasklist_lock and reacquire it. 708 * drop tasklist_lock and reacquire it.
709 */ 709 */
710 exit_ptrace(father); 710 exit_ptrace(father);
711 reaper = find_new_reaper(father); 711 reaper = find_new_reaper(father);
712 712
713 list_for_each_entry_safe(p, n, &father->children, sibling) { 713 list_for_each_entry_safe(p, n, &father->children, sibling) {
714 struct task_struct *t = p; 714 struct task_struct *t = p;
715 do { 715 do {
716 t->real_parent = reaper; 716 t->real_parent = reaper;
717 if (t->parent == father) { 717 if (t->parent == father) {
718 BUG_ON(t->ptrace); 718 BUG_ON(t->ptrace);
719 t->parent = t->real_parent; 719 t->parent = t->real_parent;
720 } 720 }
721 if (t->pdeath_signal) 721 if (t->pdeath_signal)
722 group_send_sig_info(t->pdeath_signal, 722 group_send_sig_info(t->pdeath_signal,
723 SEND_SIG_NOINFO, t); 723 SEND_SIG_NOINFO, t);
724 } while_each_thread(p, t); 724 } while_each_thread(p, t);
725 reparent_leader(father, p, &dead_children); 725 reparent_leader(father, p, &dead_children);
726 } 726 }
727 write_unlock_irq(&tasklist_lock); 727 write_unlock_irq(&tasklist_lock);
728 728
729 BUG_ON(!list_empty(&father->children)); 729 BUG_ON(!list_empty(&father->children));
730 730
731 list_for_each_entry_safe(p, n, &dead_children, sibling) { 731 list_for_each_entry_safe(p, n, &dead_children, sibling) {
732 list_del_init(&p->sibling); 732 list_del_init(&p->sibling);
733 release_task(p); 733 release_task(p);
734 } 734 }
735 } 735 }
736 736
737 /* 737 /*
738 * Send signals to all our closest relatives so that they know 738 * Send signals to all our closest relatives so that they know
739 * to properly mourn us.. 739 * to properly mourn us..
740 */ 740 */
741 static void exit_notify(struct task_struct *tsk, int group_dead) 741 static void exit_notify(struct task_struct *tsk, int group_dead)
742 { 742 {
743 bool autoreap; 743 bool autoreap;
744 744
745 /* 745 /*
746 * This does two things: 746 * This does two things:
747 * 747 *
748 * A. Make init inherit all the child processes 748 * A. Make init inherit all the child processes
749 * B. Check to see if any process groups have become orphaned 749 * B. Check to see if any process groups have become orphaned
750 * as a result of our exiting, and if they have any stopped 750 * as a result of our exiting, and if they have any stopped
751 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) 751 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
752 */ 752 */
753 forget_original_parent(tsk); 753 forget_original_parent(tsk);
754 exit_task_namespaces(tsk); 754 exit_task_namespaces(tsk);
755 755
756 write_lock_irq(&tasklist_lock); 756 write_lock_irq(&tasklist_lock);
757 if (group_dead) 757 if (group_dead)
758 kill_orphaned_pgrp(tsk->group_leader, NULL); 758 kill_orphaned_pgrp(tsk->group_leader, NULL);
759 759
760 if (unlikely(tsk->ptrace)) { 760 if (unlikely(tsk->ptrace)) {
761 int sig = thread_group_leader(tsk) && 761 int sig = thread_group_leader(tsk) &&
762 thread_group_empty(tsk) && 762 thread_group_empty(tsk) &&
763 !ptrace_reparented(tsk) ? 763 !ptrace_reparented(tsk) ?
764 tsk->exit_signal : SIGCHLD; 764 tsk->exit_signal : SIGCHLD;
765 autoreap = do_notify_parent(tsk, sig); 765 autoreap = do_notify_parent(tsk, sig);
766 } else if (thread_group_leader(tsk)) { 766 } else if (thread_group_leader(tsk)) {
767 autoreap = thread_group_empty(tsk) && 767 autoreap = thread_group_empty(tsk) &&
768 do_notify_parent(tsk, tsk->exit_signal); 768 do_notify_parent(tsk, tsk->exit_signal);
769 } else { 769 } else {
770 autoreap = true; 770 autoreap = true;
771 } 771 }
772 772
773 tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; 773 tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
774 774
775 /* mt-exec, de_thread() is waiting for group leader */ 775 /* mt-exec, de_thread() is waiting for group leader */
776 if (unlikely(tsk->signal->notify_count < 0)) 776 if (unlikely(tsk->signal->notify_count < 0))
777 wake_up_process(tsk->signal->group_exit_task); 777 wake_up_process(tsk->signal->group_exit_task);
778 write_unlock_irq(&tasklist_lock); 778 write_unlock_irq(&tasklist_lock);
779 779
780 /* If the process is dead, release it - nobody will wait for it */ 780 /* If the process is dead, release it - nobody will wait for it */
781 if (autoreap) 781 if (autoreap)
782 release_task(tsk); 782 release_task(tsk);
783 } 783 }
784 784
785 #ifdef CONFIG_DEBUG_STACK_USAGE 785 #ifdef CONFIG_DEBUG_STACK_USAGE
786 static void check_stack_usage(void) 786 static void check_stack_usage(void)
787 { 787 {
788 static DEFINE_SPINLOCK(low_water_lock); 788 static DEFINE_SPINLOCK(low_water_lock);
789 static int lowest_to_date = THREAD_SIZE; 789 static int lowest_to_date = THREAD_SIZE;
790 unsigned long free; 790 unsigned long free;
791 791
792 free = stack_not_used(current); 792 free = stack_not_used(current);
793 793
794 if (free >= lowest_to_date) 794 if (free >= lowest_to_date)
795 return; 795 return;
796 796
797 spin_lock(&low_water_lock); 797 spin_lock(&low_water_lock);
798 if (free < lowest_to_date) { 798 if (free < lowest_to_date) {
799 printk(KERN_WARNING "%s (%d) used greatest stack depth: " 799 printk(KERN_WARNING "%s (%d) used greatest stack depth: "
800 "%lu bytes left\n", 800 "%lu bytes left\n",
801 current->comm, task_pid_nr(current), free); 801 current->comm, task_pid_nr(current), free);
802 lowest_to_date = free; 802 lowest_to_date = free;
803 } 803 }
804 spin_unlock(&low_water_lock); 804 spin_unlock(&low_water_lock);
805 } 805 }
806 #else 806 #else
807 static inline void check_stack_usage(void) {} 807 static inline void check_stack_usage(void) {}
808 #endif 808 #endif
809 809
810 void do_exit(long code) 810 void do_exit(long code)
811 { 811 {
812 struct task_struct *tsk = current; 812 struct task_struct *tsk = current;
813 int group_dead; 813 int group_dead;
814 814
815 profile_task_exit(tsk); 815 profile_task_exit(tsk);
816 816
817 WARN_ON(blk_needs_flush_plug(tsk)); 817 WARN_ON(blk_needs_flush_plug(tsk));
818 818
819 if (unlikely(in_interrupt())) 819 if (unlikely(in_interrupt()))
820 panic("Aiee, killing interrupt handler!"); 820 panic("Aiee, killing interrupt handler!");
821 if (unlikely(!tsk->pid)) 821 if (unlikely(!tsk->pid))
822 panic("Attempted to kill the idle task!"); 822 panic("Attempted to kill the idle task!");
823 823
824 /* 824 /*
825 * If do_exit is called because this processes oopsed, it's possible 825 * If do_exit is called because this processes oopsed, it's possible
826 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before 826 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
827 * continuing. Amongst other possible reasons, this is to prevent 827 * continuing. Amongst other possible reasons, this is to prevent
828 * mm_release()->clear_child_tid() from writing to a user-controlled 828 * mm_release()->clear_child_tid() from writing to a user-controlled
829 * kernel address. 829 * kernel address.
830 */ 830 */
831 set_fs(USER_DS); 831 set_fs(USER_DS);
832 832
833 ptrace_event(PTRACE_EVENT_EXIT, code); 833 ptrace_event(PTRACE_EVENT_EXIT, code);
834 834
835 validate_creds_for_do_exit(tsk); 835 validate_creds_for_do_exit(tsk);
836 836
837 /* 837 /*
838 * We're taking recursive faults here in do_exit. Safest is to just 838 * We're taking recursive faults here in do_exit. Safest is to just
839 * leave this task alone and wait for reboot. 839 * leave this task alone and wait for reboot.
840 */ 840 */
841 if (unlikely(tsk->flags & PF_EXITING)) { 841 if (unlikely(tsk->flags & PF_EXITING)) {
842 printk(KERN_ALERT 842 printk(KERN_ALERT
843 "Fixing recursive fault but reboot is needed!\n"); 843 "Fixing recursive fault but reboot is needed!\n");
844 /* 844 /*
845 * We can do this unlocked here. The futex code uses 845 * We can do this unlocked here. The futex code uses
846 * this flag just to verify whether the pi state 846 * this flag just to verify whether the pi state
847 * cleanup has been done or not. In the worst case it 847 * cleanup has been done or not. In the worst case it
848 * loops once more. We pretend that the cleanup was 848 * loops once more. We pretend that the cleanup was
849 * done as there is no way to return. Either the 849 * done as there is no way to return. Either the
850 * OWNER_DIED bit is set by now or we push the blocked 850 * OWNER_DIED bit is set by now or we push the blocked
851 * task into the wait for ever nirwana as well. 851 * task into the wait for ever nirwana as well.
852 */ 852 */
853 tsk->flags |= PF_EXITPIDONE; 853 tsk->flags |= PF_EXITPIDONE;
854 set_current_state(TASK_UNINTERRUPTIBLE); 854 set_current_state(TASK_UNINTERRUPTIBLE);
855 schedule(); 855 schedule();
856 } 856 }
857 857
858 exit_signals(tsk); /* sets PF_EXITING */ 858 exit_signals(tsk); /* sets PF_EXITING */
859 /* 859 /*
860 * tsk->flags are checked in the futex code to protect against 860 * tsk->flags are checked in the futex code to protect against
861 * an exiting task cleaning up the robust pi futexes. 861 * an exiting task cleaning up the robust pi futexes.
862 */ 862 */
863 smp_mb(); 863 smp_mb();
864 raw_spin_unlock_wait(&tsk->pi_lock); 864 raw_spin_unlock_wait(&tsk->pi_lock);
865 865
866 if (unlikely(in_atomic())) 866 if (unlikely(in_atomic()))
867 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 867 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
868 current->comm, task_pid_nr(current), 868 current->comm, task_pid_nr(current),
869 preempt_count()); 869 preempt_count());
870 870
871 acct_update_integrals(tsk); 871 acct_update_integrals(tsk);
872 /* sync mm's RSS info before statistics gathering */ 872 /* sync mm's RSS info before statistics gathering */
873 if (tsk->mm) 873 if (tsk->mm)
874 sync_mm_rss(tsk->mm); 874 sync_mm_rss(tsk->mm);
875 group_dead = atomic_dec_and_test(&tsk->signal->live); 875 group_dead = atomic_dec_and_test(&tsk->signal->live);
876 if (group_dead) { 876 if (group_dead) {
877 hrtimer_cancel(&tsk->signal->real_timer); 877 hrtimer_cancel(&tsk->signal->real_timer);
878 exit_itimers(tsk->signal); 878 exit_itimers(tsk->signal);
879 if (tsk->mm) 879 if (tsk->mm)
880 setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); 880 setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
881 } 881 }
882 acct_collect(code, group_dead); 882 acct_collect(code, group_dead);
883 if (group_dead) 883 if (group_dead)
884 tty_audit_exit(); 884 tty_audit_exit();
885 audit_free(tsk); 885 audit_free(tsk);
886 886
887 tsk->exit_code = code; 887 tsk->exit_code = code;
888 taskstats_exit(tsk, group_dead); 888 taskstats_exit(tsk, group_dead);
889 889
890 exit_mm(tsk); 890 exit_mm(tsk);
891 891
892 if (group_dead) 892 if (group_dead)
893 acct_process(); 893 acct_process();
894 trace_sched_process_exit(tsk); 894 trace_sched_process_exit(tsk);
895 895
896 exit_sem(tsk); 896 exit_sem(tsk);
897 exit_shm(tsk); 897 exit_shm(tsk);
898 exit_files(tsk); 898 exit_files(tsk);
899 exit_fs(tsk); 899 exit_fs(tsk);
900 exit_task_work(tsk); 900 exit_task_work(tsk);
901 check_stack_usage(); 901 check_stack_usage();
902 exit_thread(); 902 exit_thread();
903 903
904 /* 904 /*
905 * Flush inherited counters to the parent - before the parent 905 * Flush inherited counters to the parent - before the parent
906 * gets woken up by child-exit notifications. 906 * gets woken up by child-exit notifications.
907 * 907 *
908 * because of cgroup mode, must be called before cgroup_exit() 908 * because of cgroup mode, must be called before cgroup_exit()
909 */ 909 */
910 perf_event_exit_task(tsk); 910 perf_event_exit_task(tsk);
911 911
912 cgroup_exit(tsk, 1); 912 cgroup_exit(tsk, 1);
913 913
914 if (group_dead) 914 if (group_dead)
915 disassociate_ctty(1); 915 disassociate_ctty(1);
916 916
917 module_put(task_thread_info(tsk)->exec_domain->module); 917 module_put(task_thread_info(tsk)->exec_domain->module);
918 918
919 proc_exit_connector(tsk); 919 proc_exit_connector(tsk);
920 920
921 /* 921 /*
922 * FIXME: do that only when needed, using sched_exit tracepoint 922 * FIXME: do that only when needed, using sched_exit tracepoint
923 */ 923 */
924 ptrace_put_breakpoints(tsk); 924 ptrace_put_breakpoints(tsk);
925 925
926 exit_notify(tsk, group_dead); 926 exit_notify(tsk, group_dead);
927 #ifdef CONFIG_NUMA 927 #ifdef CONFIG_NUMA
928 task_lock(tsk); 928 task_lock(tsk);
929 mpol_put(tsk->mempolicy); 929 mpol_put(tsk->mempolicy);
930 tsk->mempolicy = NULL; 930 tsk->mempolicy = NULL;
931 task_unlock(tsk); 931 task_unlock(tsk);
932 #endif 932 #endif
933 #ifdef CONFIG_FUTEX 933 #ifdef CONFIG_FUTEX
934 if (unlikely(current->pi_state_cache)) 934 if (unlikely(current->pi_state_cache))
935 kfree(current->pi_state_cache); 935 kfree(current->pi_state_cache);
936 #endif 936 #endif
937 /* 937 /*
938 * Make sure we are holding no locks: 938 * Make sure we are holding no locks:
939 */ 939 */
940 debug_check_no_locks_held(tsk); 940 debug_check_no_locks_held(tsk);
941 /* 941 /*
942 * We can do this unlocked here. The futex code uses this flag 942 * We can do this unlocked here. The futex code uses this flag
943 * just to verify whether the pi state cleanup has been done 943 * just to verify whether the pi state cleanup has been done
944 * or not. In the worst case it loops once more. 944 * or not. In the worst case it loops once more.
945 */ 945 */
946 tsk->flags |= PF_EXITPIDONE; 946 tsk->flags |= PF_EXITPIDONE;
947 947
948 if (tsk->io_context) 948 if (tsk->io_context)
949 exit_io_context(tsk); 949 exit_io_context(tsk);
950 950
951 if (tsk->splice_pipe) 951 if (tsk->splice_pipe)
952 __free_pipe_info(tsk->splice_pipe); 952 __free_pipe_info(tsk->splice_pipe);
953 953
954 if (tsk->task_frag.page) 954 if (tsk->task_frag.page)
955 put_page(tsk->task_frag.page); 955 put_page(tsk->task_frag.page);
956 956
957 validate_creds_for_do_exit(tsk); 957 validate_creds_for_do_exit(tsk);
958 958
959 preempt_disable(); 959 preempt_disable();
960 if (tsk->nr_dirtied) 960 if (tsk->nr_dirtied)
961 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 961 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
962 exit_rcu(); 962 exit_rcu();
963 963
964 /* 964 /*
965 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed 965 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
966 * when the following two conditions become true. 966 * when the following two conditions become true.
967 * - There is race condition of mmap_sem (It is acquired by 967 * - There is race condition of mmap_sem (It is acquired by
968 * exit_mm()), and 968 * exit_mm()), and
969 * - SMI occurs before setting TASK_RUNINNG. 969 * - SMI occurs before setting TASK_RUNINNG.
970 * (or hypervisor of virtual machine switches to other guest) 970 * (or hypervisor of virtual machine switches to other guest)
971 * As a result, we may become TASK_RUNNING after becoming TASK_DEAD 971 * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
972 * 972 *
973 * To avoid it, we have to wait for releasing tsk->pi_lock which 973 * To avoid it, we have to wait for releasing tsk->pi_lock which
974 * is held by try_to_wake_up() 974 * is held by try_to_wake_up()
975 */ 975 */
976 smp_mb(); 976 smp_mb();
977 raw_spin_unlock_wait(&tsk->pi_lock); 977 raw_spin_unlock_wait(&tsk->pi_lock);
978 978
979 /* causes final put_task_struct in finish_task_switch(). */ 979 /* causes final put_task_struct in finish_task_switch(). */
980 tsk->state = TASK_DEAD; 980 tsk->state = TASK_DEAD;
981 tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ 981 tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
982 schedule(); 982 schedule();
983 BUG(); 983 BUG();
984 /* Avoid "noreturn function does return". */ 984 /* Avoid "noreturn function does return". */
985 for (;;) 985 for (;;)
986 cpu_relax(); /* For when BUG is null */ 986 cpu_relax(); /* For when BUG is null */
987 } 987 }
988 988
989 EXPORT_SYMBOL_GPL(do_exit); 989 EXPORT_SYMBOL_GPL(do_exit);
990 990
991 void complete_and_exit(struct completion *comp, long code) 991 void complete_and_exit(struct completion *comp, long code)
992 { 992 {
993 if (comp) 993 if (comp)
994 complete(comp); 994 complete(comp);
995 995
996 do_exit(code); 996 do_exit(code);
997 } 997 }
998 998
999 EXPORT_SYMBOL(complete_and_exit); 999 EXPORT_SYMBOL(complete_and_exit);
1000 1000
1001 SYSCALL_DEFINE1(exit, int, error_code) 1001 SYSCALL_DEFINE1(exit, int, error_code)
1002 { 1002 {
1003 do_exit((error_code&0xff)<<8); 1003 do_exit((error_code&0xff)<<8);
1004 } 1004 }
1005 1005
1006 /* 1006 /*
1007 * Take down every thread in the group. This is called by fatal signals 1007 * Take down every thread in the group. This is called by fatal signals
1008 * as well as by sys_exit_group (below). 1008 * as well as by sys_exit_group (below).
1009 */ 1009 */
1010 void 1010 void
1011 do_group_exit(int exit_code) 1011 do_group_exit(int exit_code)
1012 { 1012 {
1013 struct signal_struct *sig = current->signal; 1013 struct signal_struct *sig = current->signal;
1014 1014
1015 BUG_ON(exit_code & 0x80); /* core dumps don't get here */ 1015 BUG_ON(exit_code & 0x80); /* core dumps don't get here */
1016 1016
1017 if (signal_group_exit(sig)) 1017 if (signal_group_exit(sig))
1018 exit_code = sig->group_exit_code; 1018 exit_code = sig->group_exit_code;
1019 else if (!thread_group_empty(current)) { 1019 else if (!thread_group_empty(current)) {
1020 struct sighand_struct *const sighand = current->sighand; 1020 struct sighand_struct *const sighand = current->sighand;
1021 spin_lock_irq(&sighand->siglock); 1021 spin_lock_irq(&sighand->siglock);
1022 if (signal_group_exit(sig)) 1022 if (signal_group_exit(sig))
1023 /* Another thread got here before we took the lock. */ 1023 /* Another thread got here before we took the lock. */
1024 exit_code = sig->group_exit_code; 1024 exit_code = sig->group_exit_code;
1025 else { 1025 else {
1026 sig->group_exit_code = exit_code; 1026 sig->group_exit_code = exit_code;
1027 sig->flags = SIGNAL_GROUP_EXIT; 1027 sig->flags = SIGNAL_GROUP_EXIT;
1028 zap_other_threads(current); 1028 zap_other_threads(current);
1029 } 1029 }
1030 spin_unlock_irq(&sighand->siglock); 1030 spin_unlock_irq(&sighand->siglock);
1031 } 1031 }
1032 1032
1033 do_exit(exit_code); 1033 do_exit(exit_code);
1034 /* NOTREACHED */ 1034 /* NOTREACHED */
1035 } 1035 }
1036 1036
1037 /* 1037 /*
1038 * this kills every thread in the thread group. Note that any externally 1038 * this kills every thread in the thread group. Note that any externally
1039 * wait4()-ing process will get the correct exit code - even if this 1039 * wait4()-ing process will get the correct exit code - even if this
1040 * thread is not the thread group leader. 1040 * thread is not the thread group leader.
1041 */ 1041 */
1042 SYSCALL_DEFINE1(exit_group, int, error_code) 1042 SYSCALL_DEFINE1(exit_group, int, error_code)
1043 { 1043 {
1044 do_group_exit((error_code & 0xff) << 8); 1044 do_group_exit((error_code & 0xff) << 8);
1045 /* NOTREACHED */ 1045 /* NOTREACHED */
1046 return 0; 1046 return 0;
1047 } 1047 }
1048 1048
1049 struct wait_opts { 1049 struct wait_opts {
1050 enum pid_type wo_type; 1050 enum pid_type wo_type;
1051 int wo_flags; 1051 int wo_flags;
1052 struct pid *wo_pid; 1052 struct pid *wo_pid;
1053 1053
1054 struct siginfo __user *wo_info; 1054 struct siginfo __user *wo_info;
1055 int __user *wo_stat; 1055 int __user *wo_stat;
1056 struct rusage __user *wo_rusage; 1056 struct rusage __user *wo_rusage;
1057 1057
1058 wait_queue_t child_wait; 1058 wait_queue_t child_wait;
1059 int notask_error; 1059 int notask_error;
1060 }; 1060 };
1061 1061
1062 static inline 1062 static inline
1063 struct pid *task_pid_type(struct task_struct *task, enum pid_type type) 1063 struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1064 { 1064 {
1065 if (type != PIDTYPE_PID) 1065 if (type != PIDTYPE_PID)
1066 task = task->group_leader; 1066 task = task->group_leader;
1067 return task->pids[type].pid; 1067 return task->pids[type].pid;
1068 } 1068 }
1069 1069
1070 static int eligible_pid(struct wait_opts *wo, struct task_struct *p) 1070 static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
1071 { 1071 {
1072 return wo->wo_type == PIDTYPE_MAX || 1072 return wo->wo_type == PIDTYPE_MAX ||
1073 task_pid_type(p, wo->wo_type) == wo->wo_pid; 1073 task_pid_type(p, wo->wo_type) == wo->wo_pid;
1074 } 1074 }
1075 1075
1076 static int eligible_child(struct wait_opts *wo, struct task_struct *p) 1076 static int eligible_child(struct wait_opts *wo, struct task_struct *p)
1077 { 1077 {
1078 if (!eligible_pid(wo, p)) 1078 if (!eligible_pid(wo, p))
1079 return 0; 1079 return 0;
1080 /* Wait for all children (clone and not) if __WALL is set; 1080 /* Wait for all children (clone and not) if __WALL is set;
1081 * otherwise, wait for clone children *only* if __WCLONE is 1081 * otherwise, wait for clone children *only* if __WCLONE is
1082 * set; otherwise, wait for non-clone children *only*. (Note: 1082 * set; otherwise, wait for non-clone children *only*. (Note:
1083 * A "clone" child here is one that reports to its parent 1083 * A "clone" child here is one that reports to its parent
1084 * using a signal other than SIGCHLD.) */ 1084 * using a signal other than SIGCHLD.) */
1085 if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) 1085 if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
1086 && !(wo->wo_flags & __WALL)) 1086 && !(wo->wo_flags & __WALL))
1087 return 0; 1087 return 0;
1088 1088
1089 return 1; 1089 return 1;
1090 } 1090 }
1091 1091
1092 static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, 1092 static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
1093 pid_t pid, uid_t uid, int why, int status) 1093 pid_t pid, uid_t uid, int why, int status)
1094 { 1094 {
1095 struct siginfo __user *infop; 1095 struct siginfo __user *infop;
1096 int retval = wo->wo_rusage 1096 int retval = wo->wo_rusage
1097 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; 1097 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1098 1098
1099 put_task_struct(p); 1099 put_task_struct(p);
1100 infop = wo->wo_info; 1100 infop = wo->wo_info;
1101 if (infop) { 1101 if (infop) {
1102 if (!retval) 1102 if (!retval)
1103 retval = put_user(SIGCHLD, &infop->si_signo); 1103 retval = put_user(SIGCHLD, &infop->si_signo);
1104 if (!retval) 1104 if (!retval)
1105 retval = put_user(0, &infop->si_errno); 1105 retval = put_user(0, &infop->si_errno);
1106 if (!retval) 1106 if (!retval)
1107 retval = put_user((short)why, &infop->si_code); 1107 retval = put_user((short)why, &infop->si_code);
1108 if (!retval) 1108 if (!retval)
1109 retval = put_user(pid, &infop->si_pid); 1109 retval = put_user(pid, &infop->si_pid);
1110 if (!retval) 1110 if (!retval)
1111 retval = put_user(uid, &infop->si_uid); 1111 retval = put_user(uid, &infop->si_uid);
1112 if (!retval) 1112 if (!retval)
1113 retval = put_user(status, &infop->si_status); 1113 retval = put_user(status, &infop->si_status);
1114 } 1114 }
1115 if (!retval) 1115 if (!retval)
1116 retval = pid; 1116 retval = pid;
1117 return retval; 1117 return retval;
1118 } 1118 }
1119 1119
1120 /* 1120 /*
1121 * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold 1121 * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold
1122 * read_lock(&tasklist_lock) on entry. If we return zero, we still hold 1122 * read_lock(&tasklist_lock) on entry. If we return zero, we still hold
1123 * the lock and this task is uninteresting. If we return nonzero, we have 1123 * the lock and this task is uninteresting. If we return nonzero, we have
1124 * released the lock and the system call should return. 1124 * released the lock and the system call should return.
1125 */ 1125 */
1126 static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) 1126 static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1127 { 1127 {
1128 unsigned long state; 1128 unsigned long state;
1129 int retval, status, traced; 1129 int retval, status, traced;
1130 pid_t pid = task_pid_vnr(p); 1130 pid_t pid = task_pid_vnr(p);
1131 uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); 1131 uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
1132 struct siginfo __user *infop; 1132 struct siginfo __user *infop;
1133 1133
1134 if (!likely(wo->wo_flags & WEXITED)) 1134 if (!likely(wo->wo_flags & WEXITED))
1135 return 0; 1135 return 0;
1136 1136
1137 if (unlikely(wo->wo_flags & WNOWAIT)) { 1137 if (unlikely(wo->wo_flags & WNOWAIT)) {
1138 int exit_code = p->exit_code; 1138 int exit_code = p->exit_code;
1139 int why; 1139 int why;
1140 1140
1141 get_task_struct(p); 1141 get_task_struct(p);
1142 read_unlock(&tasklist_lock); 1142 read_unlock(&tasklist_lock);
1143 if ((exit_code & 0x7f) == 0) { 1143 if ((exit_code & 0x7f) == 0) {
1144 why = CLD_EXITED; 1144 why = CLD_EXITED;
1145 status = exit_code >> 8; 1145 status = exit_code >> 8;
1146 } else { 1146 } else {
1147 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; 1147 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
1148 status = exit_code & 0x7f; 1148 status = exit_code & 0x7f;
1149 } 1149 }
1150 return wait_noreap_copyout(wo, p, pid, uid, why, status); 1150 return wait_noreap_copyout(wo, p, pid, uid, why, status);
1151 } 1151 }
1152 1152
1153 /* 1153 /*
1154 * Try to move the task's state to DEAD 1154 * Try to move the task's state to DEAD
1155 * only one thread is allowed to do this: 1155 * only one thread is allowed to do this:
1156 */ 1156 */
1157 state = xchg(&p->exit_state, EXIT_DEAD); 1157 state = xchg(&p->exit_state, EXIT_DEAD);
1158 if (state != EXIT_ZOMBIE) { 1158 if (state != EXIT_ZOMBIE) {
1159 BUG_ON(state != EXIT_DEAD); 1159 BUG_ON(state != EXIT_DEAD);
1160 return 0; 1160 return 0;
1161 } 1161 }
1162 1162
1163 traced = ptrace_reparented(p); 1163 traced = ptrace_reparented(p);
1164 /* 1164 /*
1165 * It can be ptraced but not reparented, check 1165 * It can be ptraced but not reparented, check
1166 * thread_group_leader() to filter out sub-threads. 1166 * thread_group_leader() to filter out sub-threads.
1167 */ 1167 */
1168 if (likely(!traced) && thread_group_leader(p)) { 1168 if (likely(!traced) && thread_group_leader(p)) {
1169 struct signal_struct *psig; 1169 struct signal_struct *psig;
1170 struct signal_struct *sig; 1170 struct signal_struct *sig;
1171 unsigned long maxrss; 1171 unsigned long maxrss;
1172 cputime_t tgutime, tgstime; 1172 cputime_t tgutime, tgstime;
1173 1173
1174 /* 1174 /*
1175 * The resource counters for the group leader are in its 1175 * The resource counters for the group leader are in its
1176 * own task_struct. Those for dead threads in the group 1176 * own task_struct. Those for dead threads in the group
1177 * are in its signal_struct, as are those for the child 1177 * are in its signal_struct, as are those for the child
1178 * processes it has previously reaped. All these 1178 * processes it has previously reaped. All these
1179 * accumulate in the parent's signal_struct c* fields. 1179 * accumulate in the parent's signal_struct c* fields.
1180 * 1180 *
1181 * We don't bother to take a lock here to protect these 1181 * We don't bother to take a lock here to protect these
1182 * p->signal fields, because they are only touched by 1182 * p->signal fields, because they are only touched by
1183 * __exit_signal, which runs with tasklist_lock 1183 * __exit_signal, which runs with tasklist_lock
1184 * write-locked anyway, and so is excluded here. We do 1184 * write-locked anyway, and so is excluded here. We do
1185 * need to protect the access to parent->signal fields, 1185 * need to protect the access to parent->signal fields,
1186 * as other threads in the parent group can be right 1186 * as other threads in the parent group can be right
1187 * here reaping other children at the same time. 1187 * here reaping other children at the same time.
1188 * 1188 *
1189 * We use thread_group_times() to get times for the thread 1189 * We use thread_group_cputime_adjusted() to get times for the thread
1190 * group, which consolidates times for all threads in the 1190 * group, which consolidates times for all threads in the
1191 * group including the group leader. 1191 * group including the group leader.
1192 */ 1192 */
1193 thread_group_times(p, &tgutime, &tgstime); 1193 thread_group_cputime_adjusted(p, &tgutime, &tgstime);
1194 spin_lock_irq(&p->real_parent->sighand->siglock); 1194 spin_lock_irq(&p->real_parent->sighand->siglock);
1195 psig = p->real_parent->signal; 1195 psig = p->real_parent->signal;
1196 sig = p->signal; 1196 sig = p->signal;
1197 psig->cutime += tgutime + sig->cutime; 1197 psig->cutime += tgutime + sig->cutime;
1198 psig->cstime += tgstime + sig->cstime; 1198 psig->cstime += tgstime + sig->cstime;
1199 psig->cgtime += p->gtime + sig->gtime + sig->cgtime; 1199 psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
1200 psig->cmin_flt += 1200 psig->cmin_flt +=
1201 p->min_flt + sig->min_flt + sig->cmin_flt; 1201 p->min_flt + sig->min_flt + sig->cmin_flt;
1202 psig->cmaj_flt += 1202 psig->cmaj_flt +=
1203 p->maj_flt + sig->maj_flt + sig->cmaj_flt; 1203 p->maj_flt + sig->maj_flt + sig->cmaj_flt;
1204 psig->cnvcsw += 1204 psig->cnvcsw +=
1205 p->nvcsw + sig->nvcsw + sig->cnvcsw; 1205 p->nvcsw + sig->nvcsw + sig->cnvcsw;
1206 psig->cnivcsw += 1206 psig->cnivcsw +=
1207 p->nivcsw + sig->nivcsw + sig->cnivcsw; 1207 p->nivcsw + sig->nivcsw + sig->cnivcsw;
1208 psig->cinblock += 1208 psig->cinblock +=
1209 task_io_get_inblock(p) + 1209 task_io_get_inblock(p) +
1210 sig->inblock + sig->cinblock; 1210 sig->inblock + sig->cinblock;
1211 psig->coublock += 1211 psig->coublock +=
1212 task_io_get_oublock(p) + 1212 task_io_get_oublock(p) +
1213 sig->oublock + sig->coublock; 1213 sig->oublock + sig->coublock;
1214 maxrss = max(sig->maxrss, sig->cmaxrss); 1214 maxrss = max(sig->maxrss, sig->cmaxrss);
1215 if (psig->cmaxrss < maxrss) 1215 if (psig->cmaxrss < maxrss)
1216 psig->cmaxrss = maxrss; 1216 psig->cmaxrss = maxrss;
1217 task_io_accounting_add(&psig->ioac, &p->ioac); 1217 task_io_accounting_add(&psig->ioac, &p->ioac);
1218 task_io_accounting_add(&psig->ioac, &sig->ioac); 1218 task_io_accounting_add(&psig->ioac, &sig->ioac);
1219 spin_unlock_irq(&p->real_parent->sighand->siglock); 1219 spin_unlock_irq(&p->real_parent->sighand->siglock);
1220 } 1220 }
1221 1221
1222 /* 1222 /*
1223 * Now we are sure this task is interesting, and no other 1223 * Now we are sure this task is interesting, and no other
1224 * thread can reap it because we set its state to EXIT_DEAD. 1224 * thread can reap it because we set its state to EXIT_DEAD.
1225 */ 1225 */
1226 read_unlock(&tasklist_lock); 1226 read_unlock(&tasklist_lock);
1227 1227
1228 retval = wo->wo_rusage 1228 retval = wo->wo_rusage
1229 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; 1229 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1230 status = (p->signal->flags & SIGNAL_GROUP_EXIT) 1230 status = (p->signal->flags & SIGNAL_GROUP_EXIT)
1231 ? p->signal->group_exit_code : p->exit_code; 1231 ? p->signal->group_exit_code : p->exit_code;
1232 if (!retval && wo->wo_stat) 1232 if (!retval && wo->wo_stat)
1233 retval = put_user(status, wo->wo_stat); 1233 retval = put_user(status, wo->wo_stat);
1234 1234
1235 infop = wo->wo_info; 1235 infop = wo->wo_info;
1236 if (!retval && infop) 1236 if (!retval && infop)
1237 retval = put_user(SIGCHLD, &infop->si_signo); 1237 retval = put_user(SIGCHLD, &infop->si_signo);
1238 if (!retval && infop) 1238 if (!retval && infop)
1239 retval = put_user(0, &infop->si_errno); 1239 retval = put_user(0, &infop->si_errno);
1240 if (!retval && infop) { 1240 if (!retval && infop) {
1241 int why; 1241 int why;
1242 1242
1243 if ((status & 0x7f) == 0) { 1243 if ((status & 0x7f) == 0) {
1244 why = CLD_EXITED; 1244 why = CLD_EXITED;
1245 status >>= 8; 1245 status >>= 8;
1246 } else { 1246 } else {
1247 why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; 1247 why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
1248 status &= 0x7f; 1248 status &= 0x7f;
1249 } 1249 }
1250 retval = put_user((short)why, &infop->si_code); 1250 retval = put_user((short)why, &infop->si_code);
1251 if (!retval) 1251 if (!retval)
1252 retval = put_user(status, &infop->si_status); 1252 retval = put_user(status, &infop->si_status);
1253 } 1253 }
1254 if (!retval && infop) 1254 if (!retval && infop)
1255 retval = put_user(pid, &infop->si_pid); 1255 retval = put_user(pid, &infop->si_pid);
1256 if (!retval && infop) 1256 if (!retval && infop)
1257 retval = put_user(uid, &infop->si_uid); 1257 retval = put_user(uid, &infop->si_uid);
1258 if (!retval) 1258 if (!retval)
1259 retval = pid; 1259 retval = pid;
1260 1260
1261 if (traced) { 1261 if (traced) {
1262 write_lock_irq(&tasklist_lock); 1262 write_lock_irq(&tasklist_lock);
1263 /* We dropped tasklist, ptracer could die and untrace */ 1263 /* We dropped tasklist, ptracer could die and untrace */
1264 ptrace_unlink(p); 1264 ptrace_unlink(p);
1265 /* 1265 /*
1266 * If this is not a sub-thread, notify the parent. 1266 * If this is not a sub-thread, notify the parent.
1267 * If parent wants a zombie, don't release it now. 1267 * If parent wants a zombie, don't release it now.
1268 */ 1268 */
1269 if (thread_group_leader(p) && 1269 if (thread_group_leader(p) &&
1270 !do_notify_parent(p, p->exit_signal)) { 1270 !do_notify_parent(p, p->exit_signal)) {
1271 p->exit_state = EXIT_ZOMBIE; 1271 p->exit_state = EXIT_ZOMBIE;
1272 p = NULL; 1272 p = NULL;
1273 } 1273 }
1274 write_unlock_irq(&tasklist_lock); 1274 write_unlock_irq(&tasklist_lock);
1275 } 1275 }
1276 if (p != NULL) 1276 if (p != NULL)
1277 release_task(p); 1277 release_task(p);
1278 1278
1279 return retval; 1279 return retval;
1280 } 1280 }
1281 1281
1282 static int *task_stopped_code(struct task_struct *p, bool ptrace) 1282 static int *task_stopped_code(struct task_struct *p, bool ptrace)
1283 { 1283 {
1284 if (ptrace) { 1284 if (ptrace) {
1285 if (task_is_stopped_or_traced(p) && 1285 if (task_is_stopped_or_traced(p) &&
1286 !(p->jobctl & JOBCTL_LISTENING)) 1286 !(p->jobctl & JOBCTL_LISTENING))
1287 return &p->exit_code; 1287 return &p->exit_code;
1288 } else { 1288 } else {
1289 if (p->signal->flags & SIGNAL_STOP_STOPPED) 1289 if (p->signal->flags & SIGNAL_STOP_STOPPED)
1290 return &p->signal->group_exit_code; 1290 return &p->signal->group_exit_code;
1291 } 1291 }
1292 return NULL; 1292 return NULL;
1293 } 1293 }
1294 1294
1295 /** 1295 /**
1296 * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED 1296 * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
1297 * @wo: wait options 1297 * @wo: wait options
1298 * @ptrace: is the wait for ptrace 1298 * @ptrace: is the wait for ptrace
1299 * @p: task to wait for 1299 * @p: task to wait for
1300 * 1300 *
1301 * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED. 1301 * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
1302 * 1302 *
1303 * CONTEXT: 1303 * CONTEXT:
1304 * read_lock(&tasklist_lock), which is released if return value is 1304 * read_lock(&tasklist_lock), which is released if return value is
1305 * non-zero. Also, grabs and releases @p->sighand->siglock. 1305 * non-zero. Also, grabs and releases @p->sighand->siglock.
1306 * 1306 *
1307 * RETURNS: 1307 * RETURNS:
1308 * 0 if wait condition didn't exist and search for other wait conditions 1308 * 0 if wait condition didn't exist and search for other wait conditions
1309 * should continue. Non-zero return, -errno on failure and @p's pid on 1309 * should continue. Non-zero return, -errno on failure and @p's pid on
1310 * success, implies that tasklist_lock is released and wait condition 1310 * success, implies that tasklist_lock is released and wait condition
1311 * search should terminate. 1311 * search should terminate.
1312 */ 1312 */
1313 static int wait_task_stopped(struct wait_opts *wo, 1313 static int wait_task_stopped(struct wait_opts *wo,
1314 int ptrace, struct task_struct *p) 1314 int ptrace, struct task_struct *p)
1315 { 1315 {
1316 struct siginfo __user *infop; 1316 struct siginfo __user *infop;
1317 int retval, exit_code, *p_code, why; 1317 int retval, exit_code, *p_code, why;
1318 uid_t uid = 0; /* unneeded, required by compiler */ 1318 uid_t uid = 0; /* unneeded, required by compiler */
1319 pid_t pid; 1319 pid_t pid;
1320 1320
1321 /* 1321 /*
1322 * Traditionally we see ptrace'd stopped tasks regardless of options. 1322 * Traditionally we see ptrace'd stopped tasks regardless of options.
1323 */ 1323 */
1324 if (!ptrace && !(wo->wo_flags & WUNTRACED)) 1324 if (!ptrace && !(wo->wo_flags & WUNTRACED))
1325 return 0; 1325 return 0;
1326 1326
1327 if (!task_stopped_code(p, ptrace)) 1327 if (!task_stopped_code(p, ptrace))
1328 return 0; 1328 return 0;
1329 1329
1330 exit_code = 0; 1330 exit_code = 0;
1331 spin_lock_irq(&p->sighand->siglock); 1331 spin_lock_irq(&p->sighand->siglock);
1332 1332
1333 p_code = task_stopped_code(p, ptrace); 1333 p_code = task_stopped_code(p, ptrace);
1334 if (unlikely(!p_code)) 1334 if (unlikely(!p_code))
1335 goto unlock_sig; 1335 goto unlock_sig;
1336 1336
1337 exit_code = *p_code; 1337 exit_code = *p_code;
1338 if (!exit_code) 1338 if (!exit_code)
1339 goto unlock_sig; 1339 goto unlock_sig;
1340 1340
1341 if (!unlikely(wo->wo_flags & WNOWAIT)) 1341 if (!unlikely(wo->wo_flags & WNOWAIT))
1342 *p_code = 0; 1342 *p_code = 0;
1343 1343
1344 uid = from_kuid_munged(current_user_ns(), task_uid(p)); 1344 uid = from_kuid_munged(current_user_ns(), task_uid(p));
1345 unlock_sig: 1345 unlock_sig:
1346 spin_unlock_irq(&p->sighand->siglock); 1346 spin_unlock_irq(&p->sighand->siglock);
1347 if (!exit_code) 1347 if (!exit_code)
1348 return 0; 1348 return 0;
1349 1349
1350 /* 1350 /*
1351 * Now we are pretty sure this task is interesting. 1351 * Now we are pretty sure this task is interesting.
1352 * Make sure it doesn't get reaped out from under us while we 1352 * Make sure it doesn't get reaped out from under us while we
1353 * give up the lock and then examine it below. We don't want to 1353 * give up the lock and then examine it below. We don't want to
1354 * keep holding onto the tasklist_lock while we call getrusage and 1354 * keep holding onto the tasklist_lock while we call getrusage and
1355 * possibly take page faults for user memory. 1355 * possibly take page faults for user memory.
1356 */ 1356 */
1357 get_task_struct(p); 1357 get_task_struct(p);
1358 pid = task_pid_vnr(p); 1358 pid = task_pid_vnr(p);
1359 why = ptrace ? CLD_TRAPPED : CLD_STOPPED; 1359 why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
1360 read_unlock(&tasklist_lock); 1360 read_unlock(&tasklist_lock);
1361 1361
1362 if (unlikely(wo->wo_flags & WNOWAIT)) 1362 if (unlikely(wo->wo_flags & WNOWAIT))
1363 return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); 1363 return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
1364 1364
1365 retval = wo->wo_rusage 1365 retval = wo->wo_rusage
1366 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; 1366 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1367 if (!retval && wo->wo_stat) 1367 if (!retval && wo->wo_stat)
1368 retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat); 1368 retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
1369 1369
1370 infop = wo->wo_info; 1370 infop = wo->wo_info;
1371 if (!retval && infop) 1371 if (!retval && infop)
1372 retval = put_user(SIGCHLD, &infop->si_signo); 1372 retval = put_user(SIGCHLD, &infop->si_signo);
1373 if (!retval && infop) 1373 if (!retval && infop)
1374 retval = put_user(0, &infop->si_errno); 1374 retval = put_user(0, &infop->si_errno);
1375 if (!retval && infop) 1375 if (!retval && infop)
1376 retval = put_user((short)why, &infop->si_code); 1376 retval = put_user((short)why, &infop->si_code);
1377 if (!retval && infop) 1377 if (!retval && infop)
1378 retval = put_user(exit_code, &infop->si_status); 1378 retval = put_user(exit_code, &infop->si_status);
1379 if (!retval && infop) 1379 if (!retval && infop)
1380 retval = put_user(pid, &infop->si_pid); 1380 retval = put_user(pid, &infop->si_pid);
1381 if (!retval && infop) 1381 if (!retval && infop)
1382 retval = put_user(uid, &infop->si_uid); 1382 retval = put_user(uid, &infop->si_uid);
1383 if (!retval) 1383 if (!retval)
1384 retval = pid; 1384 retval = pid;
1385 put_task_struct(p); 1385 put_task_struct(p);
1386 1386
1387 BUG_ON(!retval); 1387 BUG_ON(!retval);
1388 return retval; 1388 return retval;
1389 } 1389 }
1390 1390
1391 /* 1391 /*
1392 * Handle do_wait work for one task in a live, non-stopped state. 1392 * Handle do_wait work for one task in a live, non-stopped state.
1393 * read_lock(&tasklist_lock) on entry. If we return zero, we still hold 1393 * read_lock(&tasklist_lock) on entry. If we return zero, we still hold
1394 * the lock and this task is uninteresting. If we return nonzero, we have 1394 * the lock and this task is uninteresting. If we return nonzero, we have
1395 * released the lock and the system call should return. 1395 * released the lock and the system call should return.
1396 */ 1396 */
1397 static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) 1397 static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1398 { 1398 {
1399 int retval; 1399 int retval;
1400 pid_t pid; 1400 pid_t pid;
1401 uid_t uid; 1401 uid_t uid;
1402 1402
1403 if (!unlikely(wo->wo_flags & WCONTINUED)) 1403 if (!unlikely(wo->wo_flags & WCONTINUED))
1404 return 0; 1404 return 0;
1405 1405
1406 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 1406 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
1407 return 0; 1407 return 0;
1408 1408
1409 spin_lock_irq(&p->sighand->siglock); 1409 spin_lock_irq(&p->sighand->siglock);
1410 /* Re-check with the lock held. */ 1410 /* Re-check with the lock held. */
1411 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) { 1411 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
1412 spin_unlock_irq(&p->sighand->siglock); 1412 spin_unlock_irq(&p->sighand->siglock);
1413 return 0; 1413 return 0;
1414 } 1414 }
1415 if (!unlikely(wo->wo_flags & WNOWAIT)) 1415 if (!unlikely(wo->wo_flags & WNOWAIT))
1416 p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 1416 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1417 uid = from_kuid_munged(current_user_ns(), task_uid(p)); 1417 uid = from_kuid_munged(current_user_ns(), task_uid(p));
1418 spin_unlock_irq(&p->sighand->siglock); 1418 spin_unlock_irq(&p->sighand->siglock);
1419 1419
1420 pid = task_pid_vnr(p); 1420 pid = task_pid_vnr(p);
1421 get_task_struct(p); 1421 get_task_struct(p);
1422 read_unlock(&tasklist_lock); 1422 read_unlock(&tasklist_lock);
1423 1423
1424 if (!wo->wo_info) { 1424 if (!wo->wo_info) {
1425 retval = wo->wo_rusage 1425 retval = wo->wo_rusage
1426 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; 1426 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1427 put_task_struct(p); 1427 put_task_struct(p);
1428 if (!retval && wo->wo_stat) 1428 if (!retval && wo->wo_stat)
1429 retval = put_user(0xffff, wo->wo_stat); 1429 retval = put_user(0xffff, wo->wo_stat);
1430 if (!retval) 1430 if (!retval)
1431 retval = pid; 1431 retval = pid;
1432 } else { 1432 } else {
1433 retval = wait_noreap_copyout(wo, p, pid, uid, 1433 retval = wait_noreap_copyout(wo, p, pid, uid,
1434 CLD_CONTINUED, SIGCONT); 1434 CLD_CONTINUED, SIGCONT);
1435 BUG_ON(retval == 0); 1435 BUG_ON(retval == 0);
1436 } 1436 }
1437 1437
1438 return retval; 1438 return retval;
1439 } 1439 }
1440 1440
1441 /* 1441 /*
1442 * Consider @p for a wait by @parent. 1442 * Consider @p for a wait by @parent.
1443 * 1443 *
1444 * -ECHILD should be in ->notask_error before the first call. 1444 * -ECHILD should be in ->notask_error before the first call.
1445 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1445 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1446 * Returns zero if the search for a child should continue; 1446 * Returns zero if the search for a child should continue;
1447 * then ->notask_error is 0 if @p is an eligible child, 1447 * then ->notask_error is 0 if @p is an eligible child,
1448 * or another error from security_task_wait(), or still -ECHILD. 1448 * or another error from security_task_wait(), or still -ECHILD.
1449 */ 1449 */
1450 static int wait_consider_task(struct wait_opts *wo, int ptrace, 1450 static int wait_consider_task(struct wait_opts *wo, int ptrace,
1451 struct task_struct *p) 1451 struct task_struct *p)
1452 { 1452 {
1453 int ret = eligible_child(wo, p); 1453 int ret = eligible_child(wo, p);
1454 if (!ret) 1454 if (!ret)
1455 return ret; 1455 return ret;
1456 1456
1457 ret = security_task_wait(p); 1457 ret = security_task_wait(p);
1458 if (unlikely(ret < 0)) { 1458 if (unlikely(ret < 0)) {
1459 /* 1459 /*
1460 * If we have not yet seen any eligible child, 1460 * If we have not yet seen any eligible child,
1461 * then let this error code replace -ECHILD. 1461 * then let this error code replace -ECHILD.
1462 * A permission error will give the user a clue 1462 * A permission error will give the user a clue
1463 * to look for security policy problems, rather 1463 * to look for security policy problems, rather
1464 * than for mysterious wait bugs. 1464 * than for mysterious wait bugs.
1465 */ 1465 */
1466 if (wo->notask_error) 1466 if (wo->notask_error)
1467 wo->notask_error = ret; 1467 wo->notask_error = ret;
1468 return 0; 1468 return 0;
1469 } 1469 }
1470 1470
1471 /* dead body doesn't have much to contribute */ 1471 /* dead body doesn't have much to contribute */
1472 if (unlikely(p->exit_state == EXIT_DEAD)) { 1472 if (unlikely(p->exit_state == EXIT_DEAD)) {
1473 /* 1473 /*
1474 * But do not ignore this task until the tracer does 1474 * But do not ignore this task until the tracer does
1475 * wait_task_zombie()->do_notify_parent(). 1475 * wait_task_zombie()->do_notify_parent().
1476 */ 1476 */
1477 if (likely(!ptrace) && unlikely(ptrace_reparented(p))) 1477 if (likely(!ptrace) && unlikely(ptrace_reparented(p)))
1478 wo->notask_error = 0; 1478 wo->notask_error = 0;
1479 return 0; 1479 return 0;
1480 } 1480 }
1481 1481
1482 /* slay zombie? */ 1482 /* slay zombie? */
1483 if (p->exit_state == EXIT_ZOMBIE) { 1483 if (p->exit_state == EXIT_ZOMBIE) {
1484 /* 1484 /*
1485 * A zombie ptracee is only visible to its ptracer. 1485 * A zombie ptracee is only visible to its ptracer.
1486 * Notification and reaping will be cascaded to the real 1486 * Notification and reaping will be cascaded to the real
1487 * parent when the ptracer detaches. 1487 * parent when the ptracer detaches.
1488 */ 1488 */
1489 if (likely(!ptrace) && unlikely(p->ptrace)) { 1489 if (likely(!ptrace) && unlikely(p->ptrace)) {
1490 /* it will become visible, clear notask_error */ 1490 /* it will become visible, clear notask_error */
1491 wo->notask_error = 0; 1491 wo->notask_error = 0;
1492 return 0; 1492 return 0;
1493 } 1493 }
1494 1494
1495 /* we don't reap group leaders with subthreads */ 1495 /* we don't reap group leaders with subthreads */
1496 if (!delay_group_leader(p)) 1496 if (!delay_group_leader(p))
1497 return wait_task_zombie(wo, p); 1497 return wait_task_zombie(wo, p);
1498 1498
1499 /* 1499 /*
1500 * Allow access to stopped/continued state via zombie by 1500 * Allow access to stopped/continued state via zombie by
1501 * falling through. Clearing of notask_error is complex. 1501 * falling through. Clearing of notask_error is complex.
1502 * 1502 *
1503 * When !@ptrace: 1503 * When !@ptrace:
1504 * 1504 *
1505 * If WEXITED is set, notask_error should naturally be 1505 * If WEXITED is set, notask_error should naturally be
1506 * cleared. If not, subset of WSTOPPED|WCONTINUED is set, 1506 * cleared. If not, subset of WSTOPPED|WCONTINUED is set,
1507 * so, if there are live subthreads, there are events to 1507 * so, if there are live subthreads, there are events to
1508 * wait for. If all subthreads are dead, it's still safe 1508 * wait for. If all subthreads are dead, it's still safe
1509 * to clear - this function will be called again in finite 1509 * to clear - this function will be called again in finite
1510 * amount time once all the subthreads are released and 1510 * amount time once all the subthreads are released and
1511 * will then return without clearing. 1511 * will then return without clearing.
1512 * 1512 *
1513 * When @ptrace: 1513 * When @ptrace:
1514 * 1514 *
1515 * Stopped state is per-task and thus can't change once the 1515 * Stopped state is per-task and thus can't change once the
1516 * target task dies. Only continued and exited can happen. 1516 * target task dies. Only continued and exited can happen.
1517 * Clear notask_error if WCONTINUED | WEXITED. 1517 * Clear notask_error if WCONTINUED | WEXITED.
1518 */ 1518 */
1519 if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) 1519 if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
1520 wo->notask_error = 0; 1520 wo->notask_error = 0;
1521 } else { 1521 } else {
1522 /* 1522 /*
1523 * If @p is ptraced by a task in its real parent's group, 1523 * If @p is ptraced by a task in its real parent's group,
1524 * hide group stop/continued state when looking at @p as 1524 * hide group stop/continued state when looking at @p as
1525 * the real parent; otherwise, a single stop can be 1525 * the real parent; otherwise, a single stop can be
1526 * reported twice as group and ptrace stops. 1526 * reported twice as group and ptrace stops.
1527 * 1527 *
1528 * If a ptracer wants to distinguish the two events for its 1528 * If a ptracer wants to distinguish the two events for its
1529 * own children, it should create a separate process which 1529 * own children, it should create a separate process which
1530 * takes the role of real parent. 1530 * takes the role of real parent.
1531 */ 1531 */
1532 if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p)) 1532 if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p))
1533 return 0; 1533 return 0;
1534 1534
1535 /* 1535 /*
1536 * @p is alive and it's gonna stop, continue or exit, so 1536 * @p is alive and it's gonna stop, continue or exit, so
1537 * there always is something to wait for. 1537 * there always is something to wait for.
1538 */ 1538 */
1539 wo->notask_error = 0; 1539 wo->notask_error = 0;
1540 } 1540 }
1541 1541
1542 /* 1542 /*
1543 * Wait for stopped. Depending on @ptrace, different stopped state 1543 * Wait for stopped. Depending on @ptrace, different stopped state
1544 * is used and the two don't interact with each other. 1544 * is used and the two don't interact with each other.
1545 */ 1545 */
1546 ret = wait_task_stopped(wo, ptrace, p); 1546 ret = wait_task_stopped(wo, ptrace, p);
1547 if (ret) 1547 if (ret)
1548 return ret; 1548 return ret;
1549 1549
1550 /* 1550 /*
1551 * Wait for continued. There's only one continued state and the 1551 * Wait for continued. There's only one continued state and the
1552 * ptracer can consume it which can confuse the real parent. Don't 1552 * ptracer can consume it which can confuse the real parent. Don't
1553 * use WCONTINUED from ptracer. You don't need or want it. 1553 * use WCONTINUED from ptracer. You don't need or want it.
1554 */ 1554 */
1555 return wait_task_continued(wo, p); 1555 return wait_task_continued(wo, p);
1556 } 1556 }
1557 1557
1558 /* 1558 /*
1559 * Do the work of do_wait() for one thread in the group, @tsk. 1559 * Do the work of do_wait() for one thread in the group, @tsk.
1560 * 1560 *
1561 * -ECHILD should be in ->notask_error before the first call. 1561 * -ECHILD should be in ->notask_error before the first call.
1562 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1562 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1563 * Returns zero if the search for a child should continue; then 1563 * Returns zero if the search for a child should continue; then
1564 * ->notask_error is 0 if there were any eligible children, 1564 * ->notask_error is 0 if there were any eligible children,
1565 * or another error from security_task_wait(), or still -ECHILD. 1565 * or another error from security_task_wait(), or still -ECHILD.
1566 */ 1566 */
1567 static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) 1567 static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1568 { 1568 {
1569 struct task_struct *p; 1569 struct task_struct *p;
1570 1570
1571 list_for_each_entry(p, &tsk->children, sibling) { 1571 list_for_each_entry(p, &tsk->children, sibling) {
1572 int ret = wait_consider_task(wo, 0, p); 1572 int ret = wait_consider_task(wo, 0, p);
1573 if (ret) 1573 if (ret)
1574 return ret; 1574 return ret;
1575 } 1575 }
1576 1576
1577 return 0; 1577 return 0;
1578 } 1578 }
1579 1579
1580 static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) 1580 static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1581 { 1581 {
1582 struct task_struct *p; 1582 struct task_struct *p;
1583 1583
1584 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { 1584 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1585 int ret = wait_consider_task(wo, 1, p); 1585 int ret = wait_consider_task(wo, 1, p);
1586 if (ret) 1586 if (ret)
1587 return ret; 1587 return ret;
1588 } 1588 }
1589 1589
1590 return 0; 1590 return 0;
1591 } 1591 }
1592 1592
1593 static int child_wait_callback(wait_queue_t *wait, unsigned mode, 1593 static int child_wait_callback(wait_queue_t *wait, unsigned mode,
1594 int sync, void *key) 1594 int sync, void *key)
1595 { 1595 {
1596 struct wait_opts *wo = container_of(wait, struct wait_opts, 1596 struct wait_opts *wo = container_of(wait, struct wait_opts,
1597 child_wait); 1597 child_wait);
1598 struct task_struct *p = key; 1598 struct task_struct *p = key;
1599 1599
1600 if (!eligible_pid(wo, p)) 1600 if (!eligible_pid(wo, p))
1601 return 0; 1601 return 0;
1602 1602
1603 if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) 1603 if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
1604 return 0; 1604 return 0;
1605 1605
1606 return default_wake_function(wait, mode, sync, key); 1606 return default_wake_function(wait, mode, sync, key);
1607 } 1607 }
1608 1608
1609 void __wake_up_parent(struct task_struct *p, struct task_struct *parent) 1609 void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
1610 { 1610 {
1611 __wake_up_sync_key(&parent->signal->wait_chldexit, 1611 __wake_up_sync_key(&parent->signal->wait_chldexit,
1612 TASK_INTERRUPTIBLE, 1, p); 1612 TASK_INTERRUPTIBLE, 1, p);
1613 } 1613 }
1614 1614
1615 static long do_wait(struct wait_opts *wo) 1615 static long do_wait(struct wait_opts *wo)
1616 { 1616 {
1617 struct task_struct *tsk; 1617 struct task_struct *tsk;
1618 int retval; 1618 int retval;
1619 1619
1620 trace_sched_process_wait(wo->wo_pid); 1620 trace_sched_process_wait(wo->wo_pid);
1621 1621
1622 init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); 1622 init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
1623 wo->child_wait.private = current; 1623 wo->child_wait.private = current;
1624 add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait); 1624 add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
1625 repeat: 1625 repeat:
1626 /* 1626 /*
1627 * If there is nothing that can match our critiera just get out. 1627 * If there is nothing that can match our critiera just get out.
1628 * We will clear ->notask_error to zero if we see any child that 1628 * We will clear ->notask_error to zero if we see any child that
1629 * might later match our criteria, even if we are not able to reap 1629 * might later match our criteria, even if we are not able to reap
1630 * it yet. 1630 * it yet.
1631 */ 1631 */
1632 wo->notask_error = -ECHILD; 1632 wo->notask_error = -ECHILD;
1633 if ((wo->wo_type < PIDTYPE_MAX) && 1633 if ((wo->wo_type < PIDTYPE_MAX) &&
1634 (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) 1634 (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
1635 goto notask; 1635 goto notask;
1636 1636
1637 set_current_state(TASK_INTERRUPTIBLE); 1637 set_current_state(TASK_INTERRUPTIBLE);
1638 read_lock(&tasklist_lock); 1638 read_lock(&tasklist_lock);
1639 tsk = current; 1639 tsk = current;
1640 do { 1640 do {
1641 retval = do_wait_thread(wo, tsk); 1641 retval = do_wait_thread(wo, tsk);
1642 if (retval) 1642 if (retval)
1643 goto end; 1643 goto end;
1644 1644
1645 retval = ptrace_do_wait(wo, tsk); 1645 retval = ptrace_do_wait(wo, tsk);
1646 if (retval) 1646 if (retval)
1647 goto end; 1647 goto end;
1648 1648
1649 if (wo->wo_flags & __WNOTHREAD) 1649 if (wo->wo_flags & __WNOTHREAD)
1650 break; 1650 break;
1651 } while_each_thread(current, tsk); 1651 } while_each_thread(current, tsk);
1652 read_unlock(&tasklist_lock); 1652 read_unlock(&tasklist_lock);
1653 1653
1654 notask: 1654 notask:
1655 retval = wo->notask_error; 1655 retval = wo->notask_error;
1656 if (!retval && !(wo->wo_flags & WNOHANG)) { 1656 if (!retval && !(wo->wo_flags & WNOHANG)) {
1657 retval = -ERESTARTSYS; 1657 retval = -ERESTARTSYS;
1658 if (!signal_pending(current)) { 1658 if (!signal_pending(current)) {
1659 schedule(); 1659 schedule();
1660 goto repeat; 1660 goto repeat;
1661 } 1661 }
1662 } 1662 }
1663 end: 1663 end:
1664 __set_current_state(TASK_RUNNING); 1664 __set_current_state(TASK_RUNNING);
1665 remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait); 1665 remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
1666 return retval; 1666 return retval;
1667 } 1667 }
1668 1668
1669 SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, 1669 SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1670 infop, int, options, struct rusage __user *, ru) 1670 infop, int, options, struct rusage __user *, ru)
1671 { 1671 {
1672 struct wait_opts wo; 1672 struct wait_opts wo;
1673 struct pid *pid = NULL; 1673 struct pid *pid = NULL;
1674 enum pid_type type; 1674 enum pid_type type;
1675 long ret; 1675 long ret;
1676 1676
1677 if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED)) 1677 if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))
1678 return -EINVAL; 1678 return -EINVAL;
1679 if (!(options & (WEXITED|WSTOPPED|WCONTINUED))) 1679 if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
1680 return -EINVAL; 1680 return -EINVAL;
1681 1681
1682 switch (which) { 1682 switch (which) {
1683 case P_ALL: 1683 case P_ALL:
1684 type = PIDTYPE_MAX; 1684 type = PIDTYPE_MAX;
1685 break; 1685 break;
1686 case P_PID: 1686 case P_PID:
1687 type = PIDTYPE_PID; 1687 type = PIDTYPE_PID;
1688 if (upid <= 0) 1688 if (upid <= 0)
1689 return -EINVAL; 1689 return -EINVAL;
1690 break; 1690 break;
1691 case P_PGID: 1691 case P_PGID:
1692 type = PIDTYPE_PGID; 1692 type = PIDTYPE_PGID;
1693 if (upid <= 0) 1693 if (upid <= 0)
1694 return -EINVAL; 1694 return -EINVAL;
1695 break; 1695 break;
1696 default: 1696 default:
1697 return -EINVAL; 1697 return -EINVAL;
1698 } 1698 }
1699 1699
1700 if (type < PIDTYPE_MAX) 1700 if (type < PIDTYPE_MAX)
1701 pid = find_get_pid(upid); 1701 pid = find_get_pid(upid);
1702 1702
1703 wo.wo_type = type; 1703 wo.wo_type = type;
1704 wo.wo_pid = pid; 1704 wo.wo_pid = pid;
1705 wo.wo_flags = options; 1705 wo.wo_flags = options;
1706 wo.wo_info = infop; 1706 wo.wo_info = infop;
1707 wo.wo_stat = NULL; 1707 wo.wo_stat = NULL;
1708 wo.wo_rusage = ru; 1708 wo.wo_rusage = ru;
1709 ret = do_wait(&wo); 1709 ret = do_wait(&wo);
1710 1710
1711 if (ret > 0) { 1711 if (ret > 0) {
1712 ret = 0; 1712 ret = 0;
1713 } else if (infop) { 1713 } else if (infop) {
1714 /* 1714 /*
1715 * For a WNOHANG return, clear out all the fields 1715 * For a WNOHANG return, clear out all the fields
1716 * we would set so the user can easily tell the 1716 * we would set so the user can easily tell the
1717 * difference. 1717 * difference.
1718 */ 1718 */
1719 if (!ret) 1719 if (!ret)
1720 ret = put_user(0, &infop->si_signo); 1720 ret = put_user(0, &infop->si_signo);
1721 if (!ret) 1721 if (!ret)
1722 ret = put_user(0, &infop->si_errno); 1722 ret = put_user(0, &infop->si_errno);
1723 if (!ret) 1723 if (!ret)
1724 ret = put_user(0, &infop->si_code); 1724 ret = put_user(0, &infop->si_code);
1725 if (!ret) 1725 if (!ret)
1726 ret = put_user(0, &infop->si_pid); 1726 ret = put_user(0, &infop->si_pid);
1727 if (!ret) 1727 if (!ret)
1728 ret = put_user(0, &infop->si_uid); 1728 ret = put_user(0, &infop->si_uid);
1729 if (!ret) 1729 if (!ret)
1730 ret = put_user(0, &infop->si_status); 1730 ret = put_user(0, &infop->si_status);
1731 } 1731 }
1732 1732
1733 put_pid(pid); 1733 put_pid(pid);
1734 1734
1735 /* avoid REGPARM breakage on x86: */ 1735 /* avoid REGPARM breakage on x86: */
1736 asmlinkage_protect(5, ret, which, upid, infop, options, ru); 1736 asmlinkage_protect(5, ret, which, upid, infop, options, ru);
1737 return ret; 1737 return ret;
1738 } 1738 }
1739 1739
1740 SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, 1740 SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1741 int, options, struct rusage __user *, ru) 1741 int, options, struct rusage __user *, ru)
1742 { 1742 {
1743 struct wait_opts wo; 1743 struct wait_opts wo;
1744 struct pid *pid = NULL; 1744 struct pid *pid = NULL;
1745 enum pid_type type; 1745 enum pid_type type;
1746 long ret; 1746 long ret;
1747 1747
1748 if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| 1748 if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
1749 __WNOTHREAD|__WCLONE|__WALL)) 1749 __WNOTHREAD|__WCLONE|__WALL))
1750 return -EINVAL; 1750 return -EINVAL;
1751 1751
1752 if (upid == -1) 1752 if (upid == -1)
1753 type = PIDTYPE_MAX; 1753 type = PIDTYPE_MAX;
1754 else if (upid < 0) { 1754 else if (upid < 0) {
1755 type = PIDTYPE_PGID; 1755 type = PIDTYPE_PGID;
1756 pid = find_get_pid(-upid); 1756 pid = find_get_pid(-upid);
1757 } else if (upid == 0) { 1757 } else if (upid == 0) {
1758 type = PIDTYPE_PGID; 1758 type = PIDTYPE_PGID;
1759 pid = get_task_pid(current, PIDTYPE_PGID); 1759 pid = get_task_pid(current, PIDTYPE_PGID);
1760 } else /* upid > 0 */ { 1760 } else /* upid > 0 */ {
1761 type = PIDTYPE_PID; 1761 type = PIDTYPE_PID;
1762 pid = find_get_pid(upid); 1762 pid = find_get_pid(upid);
1763 } 1763 }
1764 1764
1765 wo.wo_type = type; 1765 wo.wo_type = type;
1766 wo.wo_pid = pid; 1766 wo.wo_pid = pid;
1767 wo.wo_flags = options | WEXITED; 1767 wo.wo_flags = options | WEXITED;
1768 wo.wo_info = NULL; 1768 wo.wo_info = NULL;
1769 wo.wo_stat = stat_addr; 1769 wo.wo_stat = stat_addr;
1770 wo.wo_rusage = ru; 1770 wo.wo_rusage = ru;
1771 ret = do_wait(&wo); 1771 ret = do_wait(&wo);
1772 put_pid(pid); 1772 put_pid(pid);
1773 1773
1774 /* avoid REGPARM breakage on x86: */ 1774 /* avoid REGPARM breakage on x86: */
1775 asmlinkage_protect(4, ret, upid, stat_addr, options, ru); 1775 asmlinkage_protect(4, ret, upid, stat_addr, options, ru);
1776 return ret; 1776 return ret;
1777 } 1777 }
1778 1778
1779 #ifdef __ARCH_WANT_SYS_WAITPID 1779 #ifdef __ARCH_WANT_SYS_WAITPID
1780 1780
1781 /* 1781 /*
1782 * sys_waitpid() remains for compatibility. waitpid() should be 1782 * sys_waitpid() remains for compatibility. waitpid() should be
1783 * implemented by calling sys_wait4() from libc.a. 1783 * implemented by calling sys_wait4() from libc.a.
1784 */ 1784 */
1785 SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) 1785 SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
1786 { 1786 {
1787 return sys_wait4(pid, stat_addr, options, NULL); 1787 return sys_wait4(pid, stat_addr, options, NULL);
1788 } 1788 }
1789 1789
1790 #endif 1790 #endif
1791 1791
kernel/sched/cputime.c
1 #include <linux/export.h> 1 #include <linux/export.h>
2 #include <linux/sched.h> 2 #include <linux/sched.h>
3 #include <linux/tsacct_kern.h> 3 #include <linux/tsacct_kern.h>
4 #include <linux/kernel_stat.h> 4 #include <linux/kernel_stat.h>
5 #include <linux/static_key.h> 5 #include <linux/static_key.h>
6 #include "sched.h" 6 #include "sched.h"
7 7
8 8
9 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 9 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
10 10
11 /* 11 /*
12 * There are no locks covering percpu hardirq/softirq time. 12 * There are no locks covering percpu hardirq/softirq time.
13 * They are only modified in vtime_account, on corresponding CPU 13 * They are only modified in vtime_account, on corresponding CPU
14 * with interrupts disabled. So, writes are safe. 14 * with interrupts disabled. So, writes are safe.
15 * They are read and saved off onto struct rq in update_rq_clock(). 15 * They are read and saved off onto struct rq in update_rq_clock().
16 * This may result in other CPU reading this CPU's irq time and can 16 * This may result in other CPU reading this CPU's irq time and can
17 * race with irq/vtime_account on this CPU. We would either get old 17 * race with irq/vtime_account on this CPU. We would either get old
18 * or new value with a side effect of accounting a slice of irq time to wrong 18 * or new value with a side effect of accounting a slice of irq time to wrong
19 * task when irq is in progress while we read rq->clock. That is a worthy 19 * task when irq is in progress while we read rq->clock. That is a worthy
20 * compromise in place of having locks on each irq in account_system_time. 20 * compromise in place of having locks on each irq in account_system_time.
21 */ 21 */
22 DEFINE_PER_CPU(u64, cpu_hardirq_time); 22 DEFINE_PER_CPU(u64, cpu_hardirq_time);
23 DEFINE_PER_CPU(u64, cpu_softirq_time); 23 DEFINE_PER_CPU(u64, cpu_softirq_time);
24 24
25 static DEFINE_PER_CPU(u64, irq_start_time); 25 static DEFINE_PER_CPU(u64, irq_start_time);
26 static int sched_clock_irqtime; 26 static int sched_clock_irqtime;
27 27
28 void enable_sched_clock_irqtime(void) 28 void enable_sched_clock_irqtime(void)
29 { 29 {
30 sched_clock_irqtime = 1; 30 sched_clock_irqtime = 1;
31 } 31 }
32 32
33 void disable_sched_clock_irqtime(void) 33 void disable_sched_clock_irqtime(void)
34 { 34 {
35 sched_clock_irqtime = 0; 35 sched_clock_irqtime = 0;
36 } 36 }
37 37
38 #ifndef CONFIG_64BIT 38 #ifndef CONFIG_64BIT
39 DEFINE_PER_CPU(seqcount_t, irq_time_seq); 39 DEFINE_PER_CPU(seqcount_t, irq_time_seq);
40 #endif /* CONFIG_64BIT */ 40 #endif /* CONFIG_64BIT */
41 41
42 /* 42 /*
43 * Called before incrementing preempt_count on {soft,}irq_enter 43 * Called before incrementing preempt_count on {soft,}irq_enter
44 * and before decrementing preempt_count on {soft,}irq_exit. 44 * and before decrementing preempt_count on {soft,}irq_exit.
45 */ 45 */
46 void irqtime_account_irq(struct task_struct *curr) 46 void irqtime_account_irq(struct task_struct *curr)
47 { 47 {
48 unsigned long flags; 48 unsigned long flags;
49 s64 delta; 49 s64 delta;
50 int cpu; 50 int cpu;
51 51
52 if (!sched_clock_irqtime) 52 if (!sched_clock_irqtime)
53 return; 53 return;
54 54
55 local_irq_save(flags); 55 local_irq_save(flags);
56 56
57 cpu = smp_processor_id(); 57 cpu = smp_processor_id();
58 delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); 58 delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
59 __this_cpu_add(irq_start_time, delta); 59 __this_cpu_add(irq_start_time, delta);
60 60
61 irq_time_write_begin(); 61 irq_time_write_begin();
62 /* 62 /*
63 * We do not account for softirq time from ksoftirqd here. 63 * We do not account for softirq time from ksoftirqd here.
64 * We want to continue accounting softirq time to ksoftirqd thread 64 * We want to continue accounting softirq time to ksoftirqd thread
65 * in that case, so as not to confuse scheduler with a special task 65 * in that case, so as not to confuse scheduler with a special task
66 * that do not consume any time, but still wants to run. 66 * that do not consume any time, but still wants to run.
67 */ 67 */
68 if (hardirq_count()) 68 if (hardirq_count())
69 __this_cpu_add(cpu_hardirq_time, delta); 69 __this_cpu_add(cpu_hardirq_time, delta);
70 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) 70 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
71 __this_cpu_add(cpu_softirq_time, delta); 71 __this_cpu_add(cpu_softirq_time, delta);
72 72
73 irq_time_write_end(); 73 irq_time_write_end();
74 local_irq_restore(flags); 74 local_irq_restore(flags);
75 } 75 }
76 EXPORT_SYMBOL_GPL(irqtime_account_irq); 76 EXPORT_SYMBOL_GPL(irqtime_account_irq);
77 77
78 static int irqtime_account_hi_update(void) 78 static int irqtime_account_hi_update(void)
79 { 79 {
80 u64 *cpustat = kcpustat_this_cpu->cpustat; 80 u64 *cpustat = kcpustat_this_cpu->cpustat;
81 unsigned long flags; 81 unsigned long flags;
82 u64 latest_ns; 82 u64 latest_ns;
83 int ret = 0; 83 int ret = 0;
84 84
85 local_irq_save(flags); 85 local_irq_save(flags);
86 latest_ns = this_cpu_read(cpu_hardirq_time); 86 latest_ns = this_cpu_read(cpu_hardirq_time);
87 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) 87 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
88 ret = 1; 88 ret = 1;
89 local_irq_restore(flags); 89 local_irq_restore(flags);
90 return ret; 90 return ret;
91 } 91 }
92 92
93 static int irqtime_account_si_update(void) 93 static int irqtime_account_si_update(void)
94 { 94 {
95 u64 *cpustat = kcpustat_this_cpu->cpustat; 95 u64 *cpustat = kcpustat_this_cpu->cpustat;
96 unsigned long flags; 96 unsigned long flags;
97 u64 latest_ns; 97 u64 latest_ns;
98 int ret = 0; 98 int ret = 0;
99 99
100 local_irq_save(flags); 100 local_irq_save(flags);
101 latest_ns = this_cpu_read(cpu_softirq_time); 101 latest_ns = this_cpu_read(cpu_softirq_time);
102 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) 102 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
103 ret = 1; 103 ret = 1;
104 local_irq_restore(flags); 104 local_irq_restore(flags);
105 return ret; 105 return ret;
106 } 106 }
107 107
108 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 108 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
109 109
110 #define sched_clock_irqtime (0) 110 #define sched_clock_irqtime (0)
111 111
112 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ 112 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
113 113
114 static inline void task_group_account_field(struct task_struct *p, int index, 114 static inline void task_group_account_field(struct task_struct *p, int index,
115 u64 tmp) 115 u64 tmp)
116 { 116 {
117 #ifdef CONFIG_CGROUP_CPUACCT 117 #ifdef CONFIG_CGROUP_CPUACCT
118 struct kernel_cpustat *kcpustat; 118 struct kernel_cpustat *kcpustat;
119 struct cpuacct *ca; 119 struct cpuacct *ca;
120 #endif 120 #endif
121 /* 121 /*
122 * Since all updates are sure to touch the root cgroup, we 122 * Since all updates are sure to touch the root cgroup, we
123 * get ourselves ahead and touch it first. If the root cgroup 123 * get ourselves ahead and touch it first. If the root cgroup
124 * is the only cgroup, then nothing else should be necessary. 124 * is the only cgroup, then nothing else should be necessary.
125 * 125 *
126 */ 126 */
127 __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; 127 __get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
128 128
129 #ifdef CONFIG_CGROUP_CPUACCT 129 #ifdef CONFIG_CGROUP_CPUACCT
130 if (unlikely(!cpuacct_subsys.active)) 130 if (unlikely(!cpuacct_subsys.active))
131 return; 131 return;
132 132
133 rcu_read_lock(); 133 rcu_read_lock();
134 ca = task_ca(p); 134 ca = task_ca(p);
135 while (ca && (ca != &root_cpuacct)) { 135 while (ca && (ca != &root_cpuacct)) {
136 kcpustat = this_cpu_ptr(ca->cpustat); 136 kcpustat = this_cpu_ptr(ca->cpustat);
137 kcpustat->cpustat[index] += tmp; 137 kcpustat->cpustat[index] += tmp;
138 ca = parent_ca(ca); 138 ca = parent_ca(ca);
139 } 139 }
140 rcu_read_unlock(); 140 rcu_read_unlock();
141 #endif 141 #endif
142 } 142 }
143 143
144 /* 144 /*
145 * Account user cpu time to a process. 145 * Account user cpu time to a process.
146 * @p: the process that the cpu time gets accounted to 146 * @p: the process that the cpu time gets accounted to
147 * @cputime: the cpu time spent in user space since the last update 147 * @cputime: the cpu time spent in user space since the last update
148 * @cputime_scaled: cputime scaled by cpu frequency 148 * @cputime_scaled: cputime scaled by cpu frequency
149 */ 149 */
150 void account_user_time(struct task_struct *p, cputime_t cputime, 150 void account_user_time(struct task_struct *p, cputime_t cputime,
151 cputime_t cputime_scaled) 151 cputime_t cputime_scaled)
152 { 152 {
153 int index; 153 int index;
154 154
155 /* Add user time to process. */ 155 /* Add user time to process. */
156 p->utime += cputime; 156 p->utime += cputime;
157 p->utimescaled += cputime_scaled; 157 p->utimescaled += cputime_scaled;
158 account_group_user_time(p, cputime); 158 account_group_user_time(p, cputime);
159 159
160 index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; 160 index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
161 161
162 /* Add user time to cpustat. */ 162 /* Add user time to cpustat. */
163 task_group_account_field(p, index, (__force u64) cputime); 163 task_group_account_field(p, index, (__force u64) cputime);
164 164
165 /* Account for user time used */ 165 /* Account for user time used */
166 acct_update_integrals(p); 166 acct_update_integrals(p);
167 } 167 }
168 168
169 /* 169 /*
170 * Account guest cpu time to a process. 170 * Account guest cpu time to a process.
171 * @p: the process that the cpu time gets accounted to 171 * @p: the process that the cpu time gets accounted to
172 * @cputime: the cpu time spent in virtual machine since the last update 172 * @cputime: the cpu time spent in virtual machine since the last update
173 * @cputime_scaled: cputime scaled by cpu frequency 173 * @cputime_scaled: cputime scaled by cpu frequency
174 */ 174 */
175 static void account_guest_time(struct task_struct *p, cputime_t cputime, 175 static void account_guest_time(struct task_struct *p, cputime_t cputime,
176 cputime_t cputime_scaled) 176 cputime_t cputime_scaled)
177 { 177 {
178 u64 *cpustat = kcpustat_this_cpu->cpustat; 178 u64 *cpustat = kcpustat_this_cpu->cpustat;
179 179
180 /* Add guest time to process. */ 180 /* Add guest time to process. */
181 p->utime += cputime; 181 p->utime += cputime;
182 p->utimescaled += cputime_scaled; 182 p->utimescaled += cputime_scaled;
183 account_group_user_time(p, cputime); 183 account_group_user_time(p, cputime);
184 p->gtime += cputime; 184 p->gtime += cputime;
185 185
186 /* Add guest time to cpustat. */ 186 /* Add guest time to cpustat. */
187 if (TASK_NICE(p) > 0) { 187 if (TASK_NICE(p) > 0) {
188 cpustat[CPUTIME_NICE] += (__force u64) cputime; 188 cpustat[CPUTIME_NICE] += (__force u64) cputime;
189 cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; 189 cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
190 } else { 190 } else {
191 cpustat[CPUTIME_USER] += (__force u64) cputime; 191 cpustat[CPUTIME_USER] += (__force u64) cputime;
192 cpustat[CPUTIME_GUEST] += (__force u64) cputime; 192 cpustat[CPUTIME_GUEST] += (__force u64) cputime;
193 } 193 }
194 } 194 }
195 195
196 /* 196 /*
197 * Account system cpu time to a process and desired cpustat field 197 * Account system cpu time to a process and desired cpustat field
198 * @p: the process that the cpu time gets accounted to 198 * @p: the process that the cpu time gets accounted to
199 * @cputime: the cpu time spent in kernel space since the last update 199 * @cputime: the cpu time spent in kernel space since the last update
200 * @cputime_scaled: cputime scaled by cpu frequency 200 * @cputime_scaled: cputime scaled by cpu frequency
201 * @target_cputime64: pointer to cpustat field that has to be updated 201 * @target_cputime64: pointer to cpustat field that has to be updated
202 */ 202 */
203 static inline 203 static inline
204 void __account_system_time(struct task_struct *p, cputime_t cputime, 204 void __account_system_time(struct task_struct *p, cputime_t cputime,
205 cputime_t cputime_scaled, int index) 205 cputime_t cputime_scaled, int index)
206 { 206 {
207 /* Add system time to process. */ 207 /* Add system time to process. */
208 p->stime += cputime; 208 p->stime += cputime;
209 p->stimescaled += cputime_scaled; 209 p->stimescaled += cputime_scaled;
210 account_group_system_time(p, cputime); 210 account_group_system_time(p, cputime);
211 211
212 /* Add system time to cpustat. */ 212 /* Add system time to cpustat. */
213 task_group_account_field(p, index, (__force u64) cputime); 213 task_group_account_field(p, index, (__force u64) cputime);
214 214
215 /* Account for system time used */ 215 /* Account for system time used */
216 acct_update_integrals(p); 216 acct_update_integrals(p);
217 } 217 }
218 218
219 /* 219 /*
220 * Account system cpu time to a process. 220 * Account system cpu time to a process.
221 * @p: the process that the cpu time gets accounted to 221 * @p: the process that the cpu time gets accounted to
222 * @hardirq_offset: the offset to subtract from hardirq_count() 222 * @hardirq_offset: the offset to subtract from hardirq_count()
223 * @cputime: the cpu time spent in kernel space since the last update 223 * @cputime: the cpu time spent in kernel space since the last update
224 * @cputime_scaled: cputime scaled by cpu frequency 224 * @cputime_scaled: cputime scaled by cpu frequency
225 */ 225 */
226 void account_system_time(struct task_struct *p, int hardirq_offset, 226 void account_system_time(struct task_struct *p, int hardirq_offset,
227 cputime_t cputime, cputime_t cputime_scaled) 227 cputime_t cputime, cputime_t cputime_scaled)
228 { 228 {
229 int index; 229 int index;
230 230
231 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 231 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
232 account_guest_time(p, cputime, cputime_scaled); 232 account_guest_time(p, cputime, cputime_scaled);
233 return; 233 return;
234 } 234 }
235 235
236 if (hardirq_count() - hardirq_offset) 236 if (hardirq_count() - hardirq_offset)
237 index = CPUTIME_IRQ; 237 index = CPUTIME_IRQ;
238 else if (in_serving_softirq()) 238 else if (in_serving_softirq())
239 index = CPUTIME_SOFTIRQ; 239 index = CPUTIME_SOFTIRQ;
240 else 240 else
241 index = CPUTIME_SYSTEM; 241 index = CPUTIME_SYSTEM;
242 242
243 __account_system_time(p, cputime, cputime_scaled, index); 243 __account_system_time(p, cputime, cputime_scaled, index);
244 } 244 }
245 245
246 /* 246 /*
247 * Account for involuntary wait time. 247 * Account for involuntary wait time.
248 * @cputime: the cpu time spent in involuntary wait 248 * @cputime: the cpu time spent in involuntary wait
249 */ 249 */
250 void account_steal_time(cputime_t cputime) 250 void account_steal_time(cputime_t cputime)
251 { 251 {
252 u64 *cpustat = kcpustat_this_cpu->cpustat; 252 u64 *cpustat = kcpustat_this_cpu->cpustat;
253 253
254 cpustat[CPUTIME_STEAL] += (__force u64) cputime; 254 cpustat[CPUTIME_STEAL] += (__force u64) cputime;
255 } 255 }
256 256
257 /* 257 /*
258 * Account for idle time. 258 * Account for idle time.
259 * @cputime: the cpu time spent in idle wait 259 * @cputime: the cpu time spent in idle wait
260 */ 260 */
261 void account_idle_time(cputime_t cputime) 261 void account_idle_time(cputime_t cputime)
262 { 262 {
263 u64 *cpustat = kcpustat_this_cpu->cpustat; 263 u64 *cpustat = kcpustat_this_cpu->cpustat;
264 struct rq *rq = this_rq(); 264 struct rq *rq = this_rq();
265 265
266 if (atomic_read(&rq->nr_iowait) > 0) 266 if (atomic_read(&rq->nr_iowait) > 0)
267 cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; 267 cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
268 else 268 else
269 cpustat[CPUTIME_IDLE] += (__force u64) cputime; 269 cpustat[CPUTIME_IDLE] += (__force u64) cputime;
270 } 270 }
271 271
272 static __always_inline bool steal_account_process_tick(void) 272 static __always_inline bool steal_account_process_tick(void)
273 { 273 {
274 #ifdef CONFIG_PARAVIRT 274 #ifdef CONFIG_PARAVIRT
275 if (static_key_false(&paravirt_steal_enabled)) { 275 if (static_key_false(&paravirt_steal_enabled)) {
276 u64 steal, st = 0; 276 u64 steal, st = 0;
277 277
278 steal = paravirt_steal_clock(smp_processor_id()); 278 steal = paravirt_steal_clock(smp_processor_id());
279 steal -= this_rq()->prev_steal_time; 279 steal -= this_rq()->prev_steal_time;
280 280
281 st = steal_ticks(steal); 281 st = steal_ticks(steal);
282 this_rq()->prev_steal_time += st * TICK_NSEC; 282 this_rq()->prev_steal_time += st * TICK_NSEC;
283 283
284 account_steal_time(st); 284 account_steal_time(st);
285 return st; 285 return st;
286 } 286 }
287 #endif 287 #endif
288 return false; 288 return false;
289 } 289 }
290 290
291 /* 291 /*
292 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live 292 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
293 * tasks (sum on group iteration) belonging to @tsk's group. 293 * tasks (sum on group iteration) belonging to @tsk's group.
294 */ 294 */
295 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) 295 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
296 { 296 {
297 struct signal_struct *sig = tsk->signal; 297 struct signal_struct *sig = tsk->signal;
298 struct task_struct *t; 298 struct task_struct *t;
299 299
300 times->utime = sig->utime; 300 times->utime = sig->utime;
301 times->stime = sig->stime; 301 times->stime = sig->stime;
302 times->sum_exec_runtime = sig->sum_sched_runtime; 302 times->sum_exec_runtime = sig->sum_sched_runtime;
303 303
304 rcu_read_lock(); 304 rcu_read_lock();
305 /* make sure we can trust tsk->thread_group list */ 305 /* make sure we can trust tsk->thread_group list */
306 if (!likely(pid_alive(tsk))) 306 if (!likely(pid_alive(tsk)))
307 goto out; 307 goto out;
308 308
309 t = tsk; 309 t = tsk;
310 do { 310 do {
311 times->utime += t->utime; 311 times->utime += t->utime;
312 times->stime += t->stime; 312 times->stime += t->stime;
313 times->sum_exec_runtime += task_sched_runtime(t); 313 times->sum_exec_runtime += task_sched_runtime(t);
314 } while_each_thread(tsk, t); 314 } while_each_thread(tsk, t);
315 out: 315 out:
316 rcu_read_unlock(); 316 rcu_read_unlock();
317 } 317 }
318 318
319 #ifndef CONFIG_VIRT_CPU_ACCOUNTING 319 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
320 320
321 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 321 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
322 /* 322 /*
323 * Account a tick to a process and cpustat 323 * Account a tick to a process and cpustat
324 * @p: the process that the cpu time gets accounted to 324 * @p: the process that the cpu time gets accounted to
325 * @user_tick: is the tick from userspace 325 * @user_tick: is the tick from userspace
326 * @rq: the pointer to rq 326 * @rq: the pointer to rq
327 * 327 *
328 * Tick demultiplexing follows the order 328 * Tick demultiplexing follows the order
329 * - pending hardirq update 329 * - pending hardirq update
330 * - pending softirq update 330 * - pending softirq update
331 * - user_time 331 * - user_time
332 * - idle_time 332 * - idle_time
333 * - system time 333 * - system time
334 * - check for guest_time 334 * - check for guest_time
335 * - else account as system_time 335 * - else account as system_time
336 * 336 *
337 * Check for hardirq is done both for system and user time as there is 337 * Check for hardirq is done both for system and user time as there is
338 * no timer going off while we are on hardirq and hence we may never get an 338 * no timer going off while we are on hardirq and hence we may never get an
339 * opportunity to update it solely in system time. 339 * opportunity to update it solely in system time.
340 * p->stime and friends are only updated on system time and not on irq 340 * p->stime and friends are only updated on system time and not on irq
341 * softirq as those do not count in task exec_runtime any more. 341 * softirq as those do not count in task exec_runtime any more.
342 */ 342 */
343 static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 343 static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
344 struct rq *rq) 344 struct rq *rq)
345 { 345 {
346 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 346 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
347 u64 *cpustat = kcpustat_this_cpu->cpustat; 347 u64 *cpustat = kcpustat_this_cpu->cpustat;
348 348
349 if (steal_account_process_tick()) 349 if (steal_account_process_tick())
350 return; 350 return;
351 351
352 if (irqtime_account_hi_update()) { 352 if (irqtime_account_hi_update()) {
353 cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; 353 cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
354 } else if (irqtime_account_si_update()) { 354 } else if (irqtime_account_si_update()) {
355 cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; 355 cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
356 } else if (this_cpu_ksoftirqd() == p) { 356 } else if (this_cpu_ksoftirqd() == p) {
357 /* 357 /*
358 * ksoftirqd time do not get accounted in cpu_softirq_time. 358 * ksoftirqd time do not get accounted in cpu_softirq_time.
359 * So, we have to handle it separately here. 359 * So, we have to handle it separately here.
360 * Also, p->stime needs to be updated for ksoftirqd. 360 * Also, p->stime needs to be updated for ksoftirqd.
361 */ 361 */
362 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 362 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
363 CPUTIME_SOFTIRQ); 363 CPUTIME_SOFTIRQ);
364 } else if (user_tick) { 364 } else if (user_tick) {
365 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 365 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
366 } else if (p == rq->idle) { 366 } else if (p == rq->idle) {
367 account_idle_time(cputime_one_jiffy); 367 account_idle_time(cputime_one_jiffy);
368 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 368 } else if (p->flags & PF_VCPU) { /* System time or guest time */
369 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); 369 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
370 } else { 370 } else {
371 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 371 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
372 CPUTIME_SYSTEM); 372 CPUTIME_SYSTEM);
373 } 373 }
374 } 374 }
375 375
376 static void irqtime_account_idle_ticks(int ticks) 376 static void irqtime_account_idle_ticks(int ticks)
377 { 377 {
378 int i; 378 int i;
379 struct rq *rq = this_rq(); 379 struct rq *rq = this_rq();
380 380
381 for (i = 0; i < ticks; i++) 381 for (i = 0; i < ticks; i++)
382 irqtime_account_process_tick(current, 0, rq); 382 irqtime_account_process_tick(current, 0, rq);
383 } 383 }
384 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 384 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
385 static void irqtime_account_idle_ticks(int ticks) {} 385 static void irqtime_account_idle_ticks(int ticks) {}
386 static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 386 static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
387 struct rq *rq) {} 387 struct rq *rq) {}
388 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 388 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
389 389
390 /* 390 /*
391 * Account a single tick of cpu time. 391 * Account a single tick of cpu time.
392 * @p: the process that the cpu time gets accounted to 392 * @p: the process that the cpu time gets accounted to
393 * @user_tick: indicates if the tick is a user or a system tick 393 * @user_tick: indicates if the tick is a user or a system tick
394 */ 394 */
395 void account_process_tick(struct task_struct *p, int user_tick) 395 void account_process_tick(struct task_struct *p, int user_tick)
396 { 396 {
397 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 397 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
398 struct rq *rq = this_rq(); 398 struct rq *rq = this_rq();
399 399
400 if (sched_clock_irqtime) { 400 if (sched_clock_irqtime) {
401 irqtime_account_process_tick(p, user_tick, rq); 401 irqtime_account_process_tick(p, user_tick, rq);
402 return; 402 return;
403 } 403 }
404 404
405 if (steal_account_process_tick()) 405 if (steal_account_process_tick())
406 return; 406 return;
407 407
408 if (user_tick) 408 if (user_tick)
409 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 409 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
410 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 410 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
411 account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, 411 account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
412 one_jiffy_scaled); 412 one_jiffy_scaled);
413 else 413 else
414 account_idle_time(cputime_one_jiffy); 414 account_idle_time(cputime_one_jiffy);
415 } 415 }
416 416
417 /* 417 /*
418 * Account multiple ticks of steal time. 418 * Account multiple ticks of steal time.
419 * @p: the process from which the cpu time has been stolen 419 * @p: the process from which the cpu time has been stolen
420 * @ticks: number of stolen ticks 420 * @ticks: number of stolen ticks
421 */ 421 */
422 void account_steal_ticks(unsigned long ticks) 422 void account_steal_ticks(unsigned long ticks)
423 { 423 {
424 account_steal_time(jiffies_to_cputime(ticks)); 424 account_steal_time(jiffies_to_cputime(ticks));
425 } 425 }
426 426
427 /* 427 /*
428 * Account multiple ticks of idle time. 428 * Account multiple ticks of idle time.
429 * @ticks: number of stolen ticks 429 * @ticks: number of stolen ticks
430 */ 430 */
431 void account_idle_ticks(unsigned long ticks) 431 void account_idle_ticks(unsigned long ticks)
432 { 432 {
433 433
434 if (sched_clock_irqtime) { 434 if (sched_clock_irqtime) {
435 irqtime_account_idle_ticks(ticks); 435 irqtime_account_idle_ticks(ticks);
436 return; 436 return;
437 } 437 }
438 438
439 account_idle_time(jiffies_to_cputime(ticks)); 439 account_idle_time(jiffies_to_cputime(ticks));
440 } 440 }
441 441
442 #endif 442 #endif
443 443
444 /* 444 /*
445 * Use precise platform statistics if available: 445 * Use precise platform statistics if available:
446 */ 446 */
447 #ifdef CONFIG_VIRT_CPU_ACCOUNTING 447 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
448 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) 448 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
449 { 449 {
450 *ut = p->utime; 450 *ut = p->utime;
451 *st = p->stime; 451 *st = p->stime;
452 } 452 }
453 453
454 void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) 454 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
455 { 455 {
456 struct task_cputime cputime; 456 struct task_cputime cputime;
457 457
458 thread_group_cputime(p, &cputime); 458 thread_group_cputime(p, &cputime);
459 459
460 *ut = cputime.utime; 460 *ut = cputime.utime;
461 *st = cputime.stime; 461 *st = cputime.stime;
462 } 462 }
463 463
464 void vtime_account_system(struct task_struct *tsk) 464 void vtime_account_system(struct task_struct *tsk)
465 { 465 {
466 unsigned long flags; 466 unsigned long flags;
467 467
468 local_irq_save(flags); 468 local_irq_save(flags);
469 __vtime_account_system(tsk); 469 __vtime_account_system(tsk);
470 local_irq_restore(flags); 470 local_irq_restore(flags);
471 } 471 }
472 EXPORT_SYMBOL_GPL(vtime_account_system); 472 EXPORT_SYMBOL_GPL(vtime_account_system);
473 473
474 /* 474 /*
475 * Archs that account the whole time spent in the idle task 475 * Archs that account the whole time spent in the idle task
476 * (outside irq) as idle time can rely on this and just implement 476 * (outside irq) as idle time can rely on this and just implement
477 * __vtime_account_system() and __vtime_account_idle(). Archs that 477 * __vtime_account_system() and __vtime_account_idle(). Archs that
478 * have other meaning of the idle time (s390 only includes the 478 * have other meaning of the idle time (s390 only includes the
479 * time spent by the CPU when it's in low power mode) must override 479 * time spent by the CPU when it's in low power mode) must override
480 * vtime_account(). 480 * vtime_account().
481 */ 481 */
482 #ifndef __ARCH_HAS_VTIME_ACCOUNT 482 #ifndef __ARCH_HAS_VTIME_ACCOUNT
483 void vtime_account(struct task_struct *tsk) 483 void vtime_account(struct task_struct *tsk)
484 { 484 {
485 unsigned long flags; 485 unsigned long flags;
486 486
487 local_irq_save(flags); 487 local_irq_save(flags);
488 488
489 if (in_interrupt() || !is_idle_task(tsk)) 489 if (in_interrupt() || !is_idle_task(tsk))
490 __vtime_account_system(tsk); 490 __vtime_account_system(tsk);
491 else 491 else
492 __vtime_account_idle(tsk); 492 __vtime_account_idle(tsk);
493 493
494 local_irq_restore(flags); 494 local_irq_restore(flags);
495 } 495 }
496 EXPORT_SYMBOL_GPL(vtime_account); 496 EXPORT_SYMBOL_GPL(vtime_account);
497 #endif /* __ARCH_HAS_VTIME_ACCOUNT */ 497 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
498 498
499 #else 499 #else
500 500
501 #ifndef nsecs_to_cputime 501 #ifndef nsecs_to_cputime
502 # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) 502 # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
503 #endif 503 #endif
504 504
505 static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) 505 static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
506 { 506 {
507 u64 temp = (__force u64) rtime; 507 u64 temp = (__force u64) rtime;
508 508
509 temp *= (__force u64) utime; 509 temp *= (__force u64) utime;
510 510
511 if (sizeof(cputime_t) == 4) 511 if (sizeof(cputime_t) == 4)
512 temp = div_u64(temp, (__force u32) total); 512 temp = div_u64(temp, (__force u32) total);
513 else 513 else
514 temp = div64_u64(temp, (__force u64) total); 514 temp = div64_u64(temp, (__force u64) total);
515 515
516 return (__force cputime_t) temp; 516 return (__force cputime_t) temp;
517 } 517 }
518 518
519 void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) 519 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
520 { 520 {
521 cputime_t rtime, utime = p->utime, total = utime + p->stime; 521 cputime_t rtime, utime = p->utime, total = utime + p->stime;
522 522
523 /* 523 /*
524 * Use CFS's precise accounting: 524 * Use CFS's precise accounting:
525 */ 525 */
526 rtime = nsecs_to_cputime(p->se.sum_exec_runtime); 526 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
527 527
528 if (total) 528 if (total)
529 utime = scale_utime(utime, rtime, total); 529 utime = scale_utime(utime, rtime, total);
530 else 530 else
531 utime = rtime; 531 utime = rtime;
532 532
533 /* 533 /*
534 * Compare with previous values, to keep monotonicity: 534 * Compare with previous values, to keep monotonicity:
535 */ 535 */
536 p->prev_utime = max(p->prev_utime, utime); 536 p->prev_utime = max(p->prev_utime, utime);
537 p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); 537 p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
538 538
539 *ut = p->prev_utime; 539 *ut = p->prev_utime;
540 *st = p->prev_stime; 540 *st = p->prev_stime;
541 } 541 }
542 542
543 /* 543 /*
544 * Must be called with siglock held. 544 * Must be called with siglock held.
545 */ 545 */
546 void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) 546 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
547 { 547 {
548 struct signal_struct *sig = p->signal; 548 struct signal_struct *sig = p->signal;
549 struct task_cputime cputime; 549 struct task_cputime cputime;
550 cputime_t rtime, utime, total; 550 cputime_t rtime, utime, total;
551 551
552 thread_group_cputime(p, &cputime); 552 thread_group_cputime(p, &cputime);
553 553
554 total = cputime.utime + cputime.stime; 554 total = cputime.utime + cputime.stime;
555 rtime = nsecs_to_cputime(cputime.sum_exec_runtime); 555 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
556 556
557 if (total) 557 if (total)
558 utime = scale_utime(cputime.utime, rtime, total); 558 utime = scale_utime(cputime.utime, rtime, total);
559 else 559 else
560 utime = rtime; 560 utime = rtime;
561 561
562 sig->prev_utime = max(sig->prev_utime, utime); 562 sig->prev_utime = max(sig->prev_utime, utime);
563 sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); 563 sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
564 564
565 *ut = sig->prev_utime; 565 *ut = sig->prev_utime;
566 *st = sig->prev_stime; 566 *st = sig->prev_stime;
567 } 567 }
568 #endif 568 #endif
569 569
1 /* 1 /*
2 * linux/kernel/sys.c 2 * linux/kernel/sys.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7 #include <linux/export.h> 7 #include <linux/export.h>
8 #include <linux/mm.h> 8 #include <linux/mm.h>
9 #include <linux/utsname.h> 9 #include <linux/utsname.h>
10 #include <linux/mman.h> 10 #include <linux/mman.h>
11 #include <linux/reboot.h> 11 #include <linux/reboot.h>
12 #include <linux/prctl.h> 12 #include <linux/prctl.h>
13 #include <linux/highuid.h> 13 #include <linux/highuid.h>
14 #include <linux/fs.h> 14 #include <linux/fs.h>
15 #include <linux/kmod.h> 15 #include <linux/kmod.h>
16 #include <linux/perf_event.h> 16 #include <linux/perf_event.h>
17 #include <linux/resource.h> 17 #include <linux/resource.h>
18 #include <linux/kernel.h> 18 #include <linux/kernel.h>
19 #include <linux/kexec.h> 19 #include <linux/kexec.h>
20 #include <linux/workqueue.h> 20 #include <linux/workqueue.h>
21 #include <linux/capability.h> 21 #include <linux/capability.h>
22 #include <linux/device.h> 22 #include <linux/device.h>
23 #include <linux/key.h> 23 #include <linux/key.h>
24 #include <linux/times.h> 24 #include <linux/times.h>
25 #include <linux/posix-timers.h> 25 #include <linux/posix-timers.h>
26 #include <linux/security.h> 26 #include <linux/security.h>
27 #include <linux/dcookies.h> 27 #include <linux/dcookies.h>
28 #include <linux/suspend.h> 28 #include <linux/suspend.h>
29 #include <linux/tty.h> 29 #include <linux/tty.h>
30 #include <linux/signal.h> 30 #include <linux/signal.h>
31 #include <linux/cn_proc.h> 31 #include <linux/cn_proc.h>
32 #include <linux/getcpu.h> 32 #include <linux/getcpu.h>
33 #include <linux/task_io_accounting_ops.h> 33 #include <linux/task_io_accounting_ops.h>
34 #include <linux/seccomp.h> 34 #include <linux/seccomp.h>
35 #include <linux/cpu.h> 35 #include <linux/cpu.h>
36 #include <linux/personality.h> 36 #include <linux/personality.h>
37 #include <linux/ptrace.h> 37 #include <linux/ptrace.h>
38 #include <linux/fs_struct.h> 38 #include <linux/fs_struct.h>
39 #include <linux/file.h> 39 #include <linux/file.h>
40 #include <linux/mount.h> 40 #include <linux/mount.h>
41 #include <linux/gfp.h> 41 #include <linux/gfp.h>
42 #include <linux/syscore_ops.h> 42 #include <linux/syscore_ops.h>
43 #include <linux/version.h> 43 #include <linux/version.h>
44 #include <linux/ctype.h> 44 #include <linux/ctype.h>
45 45
46 #include <linux/compat.h> 46 #include <linux/compat.h>
47 #include <linux/syscalls.h> 47 #include <linux/syscalls.h>
48 #include <linux/kprobes.h> 48 #include <linux/kprobes.h>
49 #include <linux/user_namespace.h> 49 #include <linux/user_namespace.h>
50 50
51 #include <linux/kmsg_dump.h> 51 #include <linux/kmsg_dump.h>
52 /* Move somewhere else to avoid recompiling? */ 52 /* Move somewhere else to avoid recompiling? */
53 #include <generated/utsrelease.h> 53 #include <generated/utsrelease.h>
54 54
55 #include <asm/uaccess.h> 55 #include <asm/uaccess.h>
56 #include <asm/io.h> 56 #include <asm/io.h>
57 #include <asm/unistd.h> 57 #include <asm/unistd.h>
58 58
59 #ifndef SET_UNALIGN_CTL 59 #ifndef SET_UNALIGN_CTL
60 # define SET_UNALIGN_CTL(a,b) (-EINVAL) 60 # define SET_UNALIGN_CTL(a,b) (-EINVAL)
61 #endif 61 #endif
62 #ifndef GET_UNALIGN_CTL 62 #ifndef GET_UNALIGN_CTL
63 # define GET_UNALIGN_CTL(a,b) (-EINVAL) 63 # define GET_UNALIGN_CTL(a,b) (-EINVAL)
64 #endif 64 #endif
65 #ifndef SET_FPEMU_CTL 65 #ifndef SET_FPEMU_CTL
66 # define SET_FPEMU_CTL(a,b) (-EINVAL) 66 # define SET_FPEMU_CTL(a,b) (-EINVAL)
67 #endif 67 #endif
68 #ifndef GET_FPEMU_CTL 68 #ifndef GET_FPEMU_CTL
69 # define GET_FPEMU_CTL(a,b) (-EINVAL) 69 # define GET_FPEMU_CTL(a,b) (-EINVAL)
70 #endif 70 #endif
71 #ifndef SET_FPEXC_CTL 71 #ifndef SET_FPEXC_CTL
72 # define SET_FPEXC_CTL(a,b) (-EINVAL) 72 # define SET_FPEXC_CTL(a,b) (-EINVAL)
73 #endif 73 #endif
74 #ifndef GET_FPEXC_CTL 74 #ifndef GET_FPEXC_CTL
75 # define GET_FPEXC_CTL(a,b) (-EINVAL) 75 # define GET_FPEXC_CTL(a,b) (-EINVAL)
76 #endif 76 #endif
77 #ifndef GET_ENDIAN 77 #ifndef GET_ENDIAN
78 # define GET_ENDIAN(a,b) (-EINVAL) 78 # define GET_ENDIAN(a,b) (-EINVAL)
79 #endif 79 #endif
80 #ifndef SET_ENDIAN 80 #ifndef SET_ENDIAN
81 # define SET_ENDIAN(a,b) (-EINVAL) 81 # define SET_ENDIAN(a,b) (-EINVAL)
82 #endif 82 #endif
83 #ifndef GET_TSC_CTL 83 #ifndef GET_TSC_CTL
84 # define GET_TSC_CTL(a) (-EINVAL) 84 # define GET_TSC_CTL(a) (-EINVAL)
85 #endif 85 #endif
86 #ifndef SET_TSC_CTL 86 #ifndef SET_TSC_CTL
87 # define SET_TSC_CTL(a) (-EINVAL) 87 # define SET_TSC_CTL(a) (-EINVAL)
88 #endif 88 #endif
89 89
90 /* 90 /*
91 * this is where the system-wide overflow UID and GID are defined, for 91 * this is where the system-wide overflow UID and GID are defined, for
92 * architectures that now have 32-bit UID/GID but didn't in the past 92 * architectures that now have 32-bit UID/GID but didn't in the past
93 */ 93 */
94 94
95 int overflowuid = DEFAULT_OVERFLOWUID; 95 int overflowuid = DEFAULT_OVERFLOWUID;
96 int overflowgid = DEFAULT_OVERFLOWGID; 96 int overflowgid = DEFAULT_OVERFLOWGID;
97 97
98 EXPORT_SYMBOL(overflowuid); 98 EXPORT_SYMBOL(overflowuid);
99 EXPORT_SYMBOL(overflowgid); 99 EXPORT_SYMBOL(overflowgid);
100 100
101 /* 101 /*
102 * the same as above, but for filesystems which can only store a 16-bit 102 * the same as above, but for filesystems which can only store a 16-bit
103 * UID and GID. as such, this is needed on all architectures 103 * UID and GID. as such, this is needed on all architectures
104 */ 104 */
105 105
106 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 106 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
107 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 107 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
108 108
109 EXPORT_SYMBOL(fs_overflowuid); 109 EXPORT_SYMBOL(fs_overflowuid);
110 EXPORT_SYMBOL(fs_overflowgid); 110 EXPORT_SYMBOL(fs_overflowgid);
111 111
112 /* 112 /*
113 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 113 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
114 */ 114 */
115 115
116 int C_A_D = 1; 116 int C_A_D = 1;
117 struct pid *cad_pid; 117 struct pid *cad_pid;
118 EXPORT_SYMBOL(cad_pid); 118 EXPORT_SYMBOL(cad_pid);
119 119
120 /* 120 /*
121 * If set, this is used for preparing the system to power off. 121 * If set, this is used for preparing the system to power off.
122 */ 122 */
123 123
124 void (*pm_power_off_prepare)(void); 124 void (*pm_power_off_prepare)(void);
125 125
126 /* 126 /*
127 * Returns true if current's euid is same as p's uid or euid, 127 * Returns true if current's euid is same as p's uid or euid,
128 * or has CAP_SYS_NICE to p's user_ns. 128 * or has CAP_SYS_NICE to p's user_ns.
129 * 129 *
130 * Called with rcu_read_lock, creds are safe 130 * Called with rcu_read_lock, creds are safe
131 */ 131 */
132 static bool set_one_prio_perm(struct task_struct *p) 132 static bool set_one_prio_perm(struct task_struct *p)
133 { 133 {
134 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 134 const struct cred *cred = current_cred(), *pcred = __task_cred(p);
135 135
136 if (uid_eq(pcred->uid, cred->euid) || 136 if (uid_eq(pcred->uid, cred->euid) ||
137 uid_eq(pcred->euid, cred->euid)) 137 uid_eq(pcred->euid, cred->euid))
138 return true; 138 return true;
139 if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) 139 if (ns_capable(pcred->user_ns, CAP_SYS_NICE))
140 return true; 140 return true;
141 return false; 141 return false;
142 } 142 }
143 143
144 /* 144 /*
145 * set the priority of a task 145 * set the priority of a task
146 * - the caller must hold the RCU read lock 146 * - the caller must hold the RCU read lock
147 */ 147 */
148 static int set_one_prio(struct task_struct *p, int niceval, int error) 148 static int set_one_prio(struct task_struct *p, int niceval, int error)
149 { 149 {
150 int no_nice; 150 int no_nice;
151 151
152 if (!set_one_prio_perm(p)) { 152 if (!set_one_prio_perm(p)) {
153 error = -EPERM; 153 error = -EPERM;
154 goto out; 154 goto out;
155 } 155 }
156 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 156 if (niceval < task_nice(p) && !can_nice(p, niceval)) {
157 error = -EACCES; 157 error = -EACCES;
158 goto out; 158 goto out;
159 } 159 }
160 no_nice = security_task_setnice(p, niceval); 160 no_nice = security_task_setnice(p, niceval);
161 if (no_nice) { 161 if (no_nice) {
162 error = no_nice; 162 error = no_nice;
163 goto out; 163 goto out;
164 } 164 }
165 if (error == -ESRCH) 165 if (error == -ESRCH)
166 error = 0; 166 error = 0;
167 set_user_nice(p, niceval); 167 set_user_nice(p, niceval);
168 out: 168 out:
169 return error; 169 return error;
170 } 170 }
171 171
172 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 172 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
173 { 173 {
174 struct task_struct *g, *p; 174 struct task_struct *g, *p;
175 struct user_struct *user; 175 struct user_struct *user;
176 const struct cred *cred = current_cred(); 176 const struct cred *cred = current_cred();
177 int error = -EINVAL; 177 int error = -EINVAL;
178 struct pid *pgrp; 178 struct pid *pgrp;
179 kuid_t uid; 179 kuid_t uid;
180 180
181 if (which > PRIO_USER || which < PRIO_PROCESS) 181 if (which > PRIO_USER || which < PRIO_PROCESS)
182 goto out; 182 goto out;
183 183
184 /* normalize: avoid signed division (rounding problems) */ 184 /* normalize: avoid signed division (rounding problems) */
185 error = -ESRCH; 185 error = -ESRCH;
186 if (niceval < -20) 186 if (niceval < -20)
187 niceval = -20; 187 niceval = -20;
188 if (niceval > 19) 188 if (niceval > 19)
189 niceval = 19; 189 niceval = 19;
190 190
191 rcu_read_lock(); 191 rcu_read_lock();
192 read_lock(&tasklist_lock); 192 read_lock(&tasklist_lock);
193 switch (which) { 193 switch (which) {
194 case PRIO_PROCESS: 194 case PRIO_PROCESS:
195 if (who) 195 if (who)
196 p = find_task_by_vpid(who); 196 p = find_task_by_vpid(who);
197 else 197 else
198 p = current; 198 p = current;
199 if (p) 199 if (p)
200 error = set_one_prio(p, niceval, error); 200 error = set_one_prio(p, niceval, error);
201 break; 201 break;
202 case PRIO_PGRP: 202 case PRIO_PGRP:
203 if (who) 203 if (who)
204 pgrp = find_vpid(who); 204 pgrp = find_vpid(who);
205 else 205 else
206 pgrp = task_pgrp(current); 206 pgrp = task_pgrp(current);
207 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 207 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
208 error = set_one_prio(p, niceval, error); 208 error = set_one_prio(p, niceval, error);
209 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 209 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
210 break; 210 break;
211 case PRIO_USER: 211 case PRIO_USER:
212 uid = make_kuid(cred->user_ns, who); 212 uid = make_kuid(cred->user_ns, who);
213 user = cred->user; 213 user = cred->user;
214 if (!who) 214 if (!who)
215 uid = cred->uid; 215 uid = cred->uid;
216 else if (!uid_eq(uid, cred->uid) && 216 else if (!uid_eq(uid, cred->uid) &&
217 !(user = find_user(uid))) 217 !(user = find_user(uid)))
218 goto out_unlock; /* No processes for this user */ 218 goto out_unlock; /* No processes for this user */
219 219
220 do_each_thread(g, p) { 220 do_each_thread(g, p) {
221 if (uid_eq(task_uid(p), uid)) 221 if (uid_eq(task_uid(p), uid))
222 error = set_one_prio(p, niceval, error); 222 error = set_one_prio(p, niceval, error);
223 } while_each_thread(g, p); 223 } while_each_thread(g, p);
224 if (!uid_eq(uid, cred->uid)) 224 if (!uid_eq(uid, cred->uid))
225 free_uid(user); /* For find_user() */ 225 free_uid(user); /* For find_user() */
226 break; 226 break;
227 } 227 }
228 out_unlock: 228 out_unlock:
229 read_unlock(&tasklist_lock); 229 read_unlock(&tasklist_lock);
230 rcu_read_unlock(); 230 rcu_read_unlock();
231 out: 231 out:
232 return error; 232 return error;
233 } 233 }
234 234
235 /* 235 /*
236 * Ugh. To avoid negative return values, "getpriority()" will 236 * Ugh. To avoid negative return values, "getpriority()" will
237 * not return the normal nice-value, but a negated value that 237 * not return the normal nice-value, but a negated value that
238 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 238 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
239 * to stay compatible. 239 * to stay compatible.
240 */ 240 */
241 SYSCALL_DEFINE2(getpriority, int, which, int, who) 241 SYSCALL_DEFINE2(getpriority, int, which, int, who)
242 { 242 {
243 struct task_struct *g, *p; 243 struct task_struct *g, *p;
244 struct user_struct *user; 244 struct user_struct *user;
245 const struct cred *cred = current_cred(); 245 const struct cred *cred = current_cred();
246 long niceval, retval = -ESRCH; 246 long niceval, retval = -ESRCH;
247 struct pid *pgrp; 247 struct pid *pgrp;
248 kuid_t uid; 248 kuid_t uid;
249 249
250 if (which > PRIO_USER || which < PRIO_PROCESS) 250 if (which > PRIO_USER || which < PRIO_PROCESS)
251 return -EINVAL; 251 return -EINVAL;
252 252
253 rcu_read_lock(); 253 rcu_read_lock();
254 read_lock(&tasklist_lock); 254 read_lock(&tasklist_lock);
255 switch (which) { 255 switch (which) {
256 case PRIO_PROCESS: 256 case PRIO_PROCESS:
257 if (who) 257 if (who)
258 p = find_task_by_vpid(who); 258 p = find_task_by_vpid(who);
259 else 259 else
260 p = current; 260 p = current;
261 if (p) { 261 if (p) {
262 niceval = 20 - task_nice(p); 262 niceval = 20 - task_nice(p);
263 if (niceval > retval) 263 if (niceval > retval)
264 retval = niceval; 264 retval = niceval;
265 } 265 }
266 break; 266 break;
267 case PRIO_PGRP: 267 case PRIO_PGRP:
268 if (who) 268 if (who)
269 pgrp = find_vpid(who); 269 pgrp = find_vpid(who);
270 else 270 else
271 pgrp = task_pgrp(current); 271 pgrp = task_pgrp(current);
272 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 272 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
273 niceval = 20 - task_nice(p); 273 niceval = 20 - task_nice(p);
274 if (niceval > retval) 274 if (niceval > retval)
275 retval = niceval; 275 retval = niceval;
276 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 276 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
277 break; 277 break;
278 case PRIO_USER: 278 case PRIO_USER:
279 uid = make_kuid(cred->user_ns, who); 279 uid = make_kuid(cred->user_ns, who);
280 user = cred->user; 280 user = cred->user;
281 if (!who) 281 if (!who)
282 uid = cred->uid; 282 uid = cred->uid;
283 else if (!uid_eq(uid, cred->uid) && 283 else if (!uid_eq(uid, cred->uid) &&
284 !(user = find_user(uid))) 284 !(user = find_user(uid)))
285 goto out_unlock; /* No processes for this user */ 285 goto out_unlock; /* No processes for this user */
286 286
287 do_each_thread(g, p) { 287 do_each_thread(g, p) {
288 if (uid_eq(task_uid(p), uid)) { 288 if (uid_eq(task_uid(p), uid)) {
289 niceval = 20 - task_nice(p); 289 niceval = 20 - task_nice(p);
290 if (niceval > retval) 290 if (niceval > retval)
291 retval = niceval; 291 retval = niceval;
292 } 292 }
293 } while_each_thread(g, p); 293 } while_each_thread(g, p);
294 if (!uid_eq(uid, cred->uid)) 294 if (!uid_eq(uid, cred->uid))
295 free_uid(user); /* for find_user() */ 295 free_uid(user); /* for find_user() */
296 break; 296 break;
297 } 297 }
298 out_unlock: 298 out_unlock:
299 read_unlock(&tasklist_lock); 299 read_unlock(&tasklist_lock);
300 rcu_read_unlock(); 300 rcu_read_unlock();
301 301
302 return retval; 302 return retval;
303 } 303 }
304 304
305 /** 305 /**
306 * emergency_restart - reboot the system 306 * emergency_restart - reboot the system
307 * 307 *
308 * Without shutting down any hardware or taking any locks 308 * Without shutting down any hardware or taking any locks
309 * reboot the system. This is called when we know we are in 309 * reboot the system. This is called when we know we are in
310 * trouble so this is our best effort to reboot. This is 310 * trouble so this is our best effort to reboot. This is
311 * safe to call in interrupt context. 311 * safe to call in interrupt context.
312 */ 312 */
313 void emergency_restart(void) 313 void emergency_restart(void)
314 { 314 {
315 kmsg_dump(KMSG_DUMP_EMERG); 315 kmsg_dump(KMSG_DUMP_EMERG);
316 machine_emergency_restart(); 316 machine_emergency_restart();
317 } 317 }
318 EXPORT_SYMBOL_GPL(emergency_restart); 318 EXPORT_SYMBOL_GPL(emergency_restart);
319 319
320 void kernel_restart_prepare(char *cmd) 320 void kernel_restart_prepare(char *cmd)
321 { 321 {
322 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 322 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
323 system_state = SYSTEM_RESTART; 323 system_state = SYSTEM_RESTART;
324 usermodehelper_disable(); 324 usermodehelper_disable();
325 device_shutdown(); 325 device_shutdown();
326 syscore_shutdown(); 326 syscore_shutdown();
327 } 327 }
328 328
329 /** 329 /**
330 * register_reboot_notifier - Register function to be called at reboot time 330 * register_reboot_notifier - Register function to be called at reboot time
331 * @nb: Info about notifier function to be called 331 * @nb: Info about notifier function to be called
332 * 332 *
333 * Registers a function with the list of functions 333 * Registers a function with the list of functions
334 * to be called at reboot time. 334 * to be called at reboot time.
335 * 335 *
336 * Currently always returns zero, as blocking_notifier_chain_register() 336 * Currently always returns zero, as blocking_notifier_chain_register()
337 * always returns zero. 337 * always returns zero.
338 */ 338 */
339 int register_reboot_notifier(struct notifier_block *nb) 339 int register_reboot_notifier(struct notifier_block *nb)
340 { 340 {
341 return blocking_notifier_chain_register(&reboot_notifier_list, nb); 341 return blocking_notifier_chain_register(&reboot_notifier_list, nb);
342 } 342 }
343 EXPORT_SYMBOL(register_reboot_notifier); 343 EXPORT_SYMBOL(register_reboot_notifier);
344 344
345 /** 345 /**
346 * unregister_reboot_notifier - Unregister previously registered reboot notifier 346 * unregister_reboot_notifier - Unregister previously registered reboot notifier
347 * @nb: Hook to be unregistered 347 * @nb: Hook to be unregistered
348 * 348 *
349 * Unregisters a previously registered reboot 349 * Unregisters a previously registered reboot
350 * notifier function. 350 * notifier function.
351 * 351 *
352 * Returns zero on success, or %-ENOENT on failure. 352 * Returns zero on success, or %-ENOENT on failure.
353 */ 353 */
354 int unregister_reboot_notifier(struct notifier_block *nb) 354 int unregister_reboot_notifier(struct notifier_block *nb)
355 { 355 {
356 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); 356 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
357 } 357 }
358 EXPORT_SYMBOL(unregister_reboot_notifier); 358 EXPORT_SYMBOL(unregister_reboot_notifier);
359 359
360 /** 360 /**
361 * kernel_restart - reboot the system 361 * kernel_restart - reboot the system
362 * @cmd: pointer to buffer containing command to execute for restart 362 * @cmd: pointer to buffer containing command to execute for restart
363 * or %NULL 363 * or %NULL
364 * 364 *
365 * Shutdown everything and perform a clean reboot. 365 * Shutdown everything and perform a clean reboot.
366 * This is not safe to call in interrupt context. 366 * This is not safe to call in interrupt context.
367 */ 367 */
368 void kernel_restart(char *cmd) 368 void kernel_restart(char *cmd)
369 { 369 {
370 kernel_restart_prepare(cmd); 370 kernel_restart_prepare(cmd);
371 disable_nonboot_cpus(); 371 disable_nonboot_cpus();
372 if (!cmd) 372 if (!cmd)
373 printk(KERN_EMERG "Restarting system.\n"); 373 printk(KERN_EMERG "Restarting system.\n");
374 else 374 else
375 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 375 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
376 kmsg_dump(KMSG_DUMP_RESTART); 376 kmsg_dump(KMSG_DUMP_RESTART);
377 machine_restart(cmd); 377 machine_restart(cmd);
378 } 378 }
379 EXPORT_SYMBOL_GPL(kernel_restart); 379 EXPORT_SYMBOL_GPL(kernel_restart);
380 380
381 static void kernel_shutdown_prepare(enum system_states state) 381 static void kernel_shutdown_prepare(enum system_states state)
382 { 382 {
383 blocking_notifier_call_chain(&reboot_notifier_list, 383 blocking_notifier_call_chain(&reboot_notifier_list,
384 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 384 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
385 system_state = state; 385 system_state = state;
386 usermodehelper_disable(); 386 usermodehelper_disable();
387 device_shutdown(); 387 device_shutdown();
388 } 388 }
389 /** 389 /**
390 * kernel_halt - halt the system 390 * kernel_halt - halt the system
391 * 391 *
392 * Shutdown everything and perform a clean system halt. 392 * Shutdown everything and perform a clean system halt.
393 */ 393 */
394 void kernel_halt(void) 394 void kernel_halt(void)
395 { 395 {
396 kernel_shutdown_prepare(SYSTEM_HALT); 396 kernel_shutdown_prepare(SYSTEM_HALT);
397 syscore_shutdown(); 397 syscore_shutdown();
398 printk(KERN_EMERG "System halted.\n"); 398 printk(KERN_EMERG "System halted.\n");
399 kmsg_dump(KMSG_DUMP_HALT); 399 kmsg_dump(KMSG_DUMP_HALT);
400 machine_halt(); 400 machine_halt();
401 } 401 }
402 402
403 EXPORT_SYMBOL_GPL(kernel_halt); 403 EXPORT_SYMBOL_GPL(kernel_halt);
404 404
405 /** 405 /**
406 * kernel_power_off - power_off the system 406 * kernel_power_off - power_off the system
407 * 407 *
408 * Shutdown everything and perform a clean system power_off. 408 * Shutdown everything and perform a clean system power_off.
409 */ 409 */
410 void kernel_power_off(void) 410 void kernel_power_off(void)
411 { 411 {
412 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 412 kernel_shutdown_prepare(SYSTEM_POWER_OFF);
413 if (pm_power_off_prepare) 413 if (pm_power_off_prepare)
414 pm_power_off_prepare(); 414 pm_power_off_prepare();
415 disable_nonboot_cpus(); 415 disable_nonboot_cpus();
416 syscore_shutdown(); 416 syscore_shutdown();
417 printk(KERN_EMERG "Power down.\n"); 417 printk(KERN_EMERG "Power down.\n");
418 kmsg_dump(KMSG_DUMP_POWEROFF); 418 kmsg_dump(KMSG_DUMP_POWEROFF);
419 machine_power_off(); 419 machine_power_off();
420 } 420 }
421 EXPORT_SYMBOL_GPL(kernel_power_off); 421 EXPORT_SYMBOL_GPL(kernel_power_off);
422 422
423 static DEFINE_MUTEX(reboot_mutex); 423 static DEFINE_MUTEX(reboot_mutex);
424 424
425 /* 425 /*
426 * Reboot system call: for obvious reasons only root may call it, 426 * Reboot system call: for obvious reasons only root may call it,
427 * and even root needs to set up some magic numbers in the registers 427 * and even root needs to set up some magic numbers in the registers
428 * so that some mistake won't make this reboot the whole machine. 428 * so that some mistake won't make this reboot the whole machine.
429 * You can also set the meaning of the ctrl-alt-del-key here. 429 * You can also set the meaning of the ctrl-alt-del-key here.
430 * 430 *
431 * reboot doesn't sync: do that yourself before calling this. 431 * reboot doesn't sync: do that yourself before calling this.
432 */ 432 */
433 SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, 433 SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
434 void __user *, arg) 434 void __user *, arg)
435 { 435 {
436 char buffer[256]; 436 char buffer[256];
437 int ret = 0; 437 int ret = 0;
438 438
439 /* We only trust the superuser with rebooting the system. */ 439 /* We only trust the superuser with rebooting the system. */
440 if (!capable(CAP_SYS_BOOT)) 440 if (!capable(CAP_SYS_BOOT))
441 return -EPERM; 441 return -EPERM;
442 442
443 /* For safety, we require "magic" arguments. */ 443 /* For safety, we require "magic" arguments. */
444 if (magic1 != LINUX_REBOOT_MAGIC1 || 444 if (magic1 != LINUX_REBOOT_MAGIC1 ||
445 (magic2 != LINUX_REBOOT_MAGIC2 && 445 (magic2 != LINUX_REBOOT_MAGIC2 &&
446 magic2 != LINUX_REBOOT_MAGIC2A && 446 magic2 != LINUX_REBOOT_MAGIC2A &&
447 magic2 != LINUX_REBOOT_MAGIC2B && 447 magic2 != LINUX_REBOOT_MAGIC2B &&
448 magic2 != LINUX_REBOOT_MAGIC2C)) 448 magic2 != LINUX_REBOOT_MAGIC2C))
449 return -EINVAL; 449 return -EINVAL;
450 450
451 /* 451 /*
452 * If pid namespaces are enabled and the current task is in a child 452 * If pid namespaces are enabled and the current task is in a child
453 * pid_namespace, the command is handled by reboot_pid_ns() which will 453 * pid_namespace, the command is handled by reboot_pid_ns() which will
454 * call do_exit(). 454 * call do_exit().
455 */ 455 */
456 ret = reboot_pid_ns(task_active_pid_ns(current), cmd); 456 ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
457 if (ret) 457 if (ret)
458 return ret; 458 return ret;
459 459
460 /* Instead of trying to make the power_off code look like 460 /* Instead of trying to make the power_off code look like
461 * halt when pm_power_off is not set do it the easy way. 461 * halt when pm_power_off is not set do it the easy way.
462 */ 462 */
463 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 463 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
464 cmd = LINUX_REBOOT_CMD_HALT; 464 cmd = LINUX_REBOOT_CMD_HALT;
465 465
466 mutex_lock(&reboot_mutex); 466 mutex_lock(&reboot_mutex);
467 switch (cmd) { 467 switch (cmd) {
468 case LINUX_REBOOT_CMD_RESTART: 468 case LINUX_REBOOT_CMD_RESTART:
469 kernel_restart(NULL); 469 kernel_restart(NULL);
470 break; 470 break;
471 471
472 case LINUX_REBOOT_CMD_CAD_ON: 472 case LINUX_REBOOT_CMD_CAD_ON:
473 C_A_D = 1; 473 C_A_D = 1;
474 break; 474 break;
475 475
476 case LINUX_REBOOT_CMD_CAD_OFF: 476 case LINUX_REBOOT_CMD_CAD_OFF:
477 C_A_D = 0; 477 C_A_D = 0;
478 break; 478 break;
479 479
480 case LINUX_REBOOT_CMD_HALT: 480 case LINUX_REBOOT_CMD_HALT:
481 kernel_halt(); 481 kernel_halt();
482 do_exit(0); 482 do_exit(0);
483 panic("cannot halt"); 483 panic("cannot halt");
484 484
485 case LINUX_REBOOT_CMD_POWER_OFF: 485 case LINUX_REBOOT_CMD_POWER_OFF:
486 kernel_power_off(); 486 kernel_power_off();
487 do_exit(0); 487 do_exit(0);
488 break; 488 break;
489 489
490 case LINUX_REBOOT_CMD_RESTART2: 490 case LINUX_REBOOT_CMD_RESTART2:
491 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 491 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
492 ret = -EFAULT; 492 ret = -EFAULT;
493 break; 493 break;
494 } 494 }
495 buffer[sizeof(buffer) - 1] = '\0'; 495 buffer[sizeof(buffer) - 1] = '\0';
496 496
497 kernel_restart(buffer); 497 kernel_restart(buffer);
498 break; 498 break;
499 499
500 #ifdef CONFIG_KEXEC 500 #ifdef CONFIG_KEXEC
501 case LINUX_REBOOT_CMD_KEXEC: 501 case LINUX_REBOOT_CMD_KEXEC:
502 ret = kernel_kexec(); 502 ret = kernel_kexec();
503 break; 503 break;
504 #endif 504 #endif
505 505
506 #ifdef CONFIG_HIBERNATION 506 #ifdef CONFIG_HIBERNATION
507 case LINUX_REBOOT_CMD_SW_SUSPEND: 507 case LINUX_REBOOT_CMD_SW_SUSPEND:
508 ret = hibernate(); 508 ret = hibernate();
509 break; 509 break;
510 #endif 510 #endif
511 511
512 default: 512 default:
513 ret = -EINVAL; 513 ret = -EINVAL;
514 break; 514 break;
515 } 515 }
516 mutex_unlock(&reboot_mutex); 516 mutex_unlock(&reboot_mutex);
517 return ret; 517 return ret;
518 } 518 }
519 519
520 static void deferred_cad(struct work_struct *dummy) 520 static void deferred_cad(struct work_struct *dummy)
521 { 521 {
522 kernel_restart(NULL); 522 kernel_restart(NULL);
523 } 523 }
524 524
525 /* 525 /*
526 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 526 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
527 * As it's called within an interrupt, it may NOT sync: the only choice 527 * As it's called within an interrupt, it may NOT sync: the only choice
528 * is whether to reboot at once, or just ignore the ctrl-alt-del. 528 * is whether to reboot at once, or just ignore the ctrl-alt-del.
529 */ 529 */
530 void ctrl_alt_del(void) 530 void ctrl_alt_del(void)
531 { 531 {
532 static DECLARE_WORK(cad_work, deferred_cad); 532 static DECLARE_WORK(cad_work, deferred_cad);
533 533
534 if (C_A_D) 534 if (C_A_D)
535 schedule_work(&cad_work); 535 schedule_work(&cad_work);
536 else 536 else
537 kill_cad_pid(SIGINT, 1); 537 kill_cad_pid(SIGINT, 1);
538 } 538 }
539 539
540 /* 540 /*
541 * Unprivileged users may change the real gid to the effective gid 541 * Unprivileged users may change the real gid to the effective gid
542 * or vice versa. (BSD-style) 542 * or vice versa. (BSD-style)
543 * 543 *
544 * If you set the real gid at all, or set the effective gid to a value not 544 * If you set the real gid at all, or set the effective gid to a value not
545 * equal to the real gid, then the saved gid is set to the new effective gid. 545 * equal to the real gid, then the saved gid is set to the new effective gid.
546 * 546 *
547 * This makes it possible for a setgid program to completely drop its 547 * This makes it possible for a setgid program to completely drop its
548 * privileges, which is often a useful assertion to make when you are doing 548 * privileges, which is often a useful assertion to make when you are doing
549 * a security audit over a program. 549 * a security audit over a program.
550 * 550 *
551 * The general idea is that a program which uses just setregid() will be 551 * The general idea is that a program which uses just setregid() will be
552 * 100% compatible with BSD. A program which uses just setgid() will be 552 * 100% compatible with BSD. A program which uses just setgid() will be
553 * 100% compatible with POSIX with saved IDs. 553 * 100% compatible with POSIX with saved IDs.
554 * 554 *
555 * SMP: There are not races, the GIDs are checked only by filesystem 555 * SMP: There are not races, the GIDs are checked only by filesystem
556 * operations (as far as semantic preservation is concerned). 556 * operations (as far as semantic preservation is concerned).
557 */ 557 */
558 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 558 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
559 { 559 {
560 struct user_namespace *ns = current_user_ns(); 560 struct user_namespace *ns = current_user_ns();
561 const struct cred *old; 561 const struct cred *old;
562 struct cred *new; 562 struct cred *new;
563 int retval; 563 int retval;
564 kgid_t krgid, kegid; 564 kgid_t krgid, kegid;
565 565
566 krgid = make_kgid(ns, rgid); 566 krgid = make_kgid(ns, rgid);
567 kegid = make_kgid(ns, egid); 567 kegid = make_kgid(ns, egid);
568 568
569 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 569 if ((rgid != (gid_t) -1) && !gid_valid(krgid))
570 return -EINVAL; 570 return -EINVAL;
571 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 571 if ((egid != (gid_t) -1) && !gid_valid(kegid))
572 return -EINVAL; 572 return -EINVAL;
573 573
574 new = prepare_creds(); 574 new = prepare_creds();
575 if (!new) 575 if (!new)
576 return -ENOMEM; 576 return -ENOMEM;
577 old = current_cred(); 577 old = current_cred();
578 578
579 retval = -EPERM; 579 retval = -EPERM;
580 if (rgid != (gid_t) -1) { 580 if (rgid != (gid_t) -1) {
581 if (gid_eq(old->gid, krgid) || 581 if (gid_eq(old->gid, krgid) ||
582 gid_eq(old->egid, krgid) || 582 gid_eq(old->egid, krgid) ||
583 nsown_capable(CAP_SETGID)) 583 nsown_capable(CAP_SETGID))
584 new->gid = krgid; 584 new->gid = krgid;
585 else 585 else
586 goto error; 586 goto error;
587 } 587 }
588 if (egid != (gid_t) -1) { 588 if (egid != (gid_t) -1) {
589 if (gid_eq(old->gid, kegid) || 589 if (gid_eq(old->gid, kegid) ||
590 gid_eq(old->egid, kegid) || 590 gid_eq(old->egid, kegid) ||
591 gid_eq(old->sgid, kegid) || 591 gid_eq(old->sgid, kegid) ||
592 nsown_capable(CAP_SETGID)) 592 nsown_capable(CAP_SETGID))
593 new->egid = kegid; 593 new->egid = kegid;
594 else 594 else
595 goto error; 595 goto error;
596 } 596 }
597 597
598 if (rgid != (gid_t) -1 || 598 if (rgid != (gid_t) -1 ||
599 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) 599 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid)))
600 new->sgid = new->egid; 600 new->sgid = new->egid;
601 new->fsgid = new->egid; 601 new->fsgid = new->egid;
602 602
603 return commit_creds(new); 603 return commit_creds(new);
604 604
605 error: 605 error:
606 abort_creds(new); 606 abort_creds(new);
607 return retval; 607 return retval;
608 } 608 }
609 609
610 /* 610 /*
611 * setgid() is implemented like SysV w/ SAVED_IDS 611 * setgid() is implemented like SysV w/ SAVED_IDS
612 * 612 *
613 * SMP: Same implicit races as above. 613 * SMP: Same implicit races as above.
614 */ 614 */
615 SYSCALL_DEFINE1(setgid, gid_t, gid) 615 SYSCALL_DEFINE1(setgid, gid_t, gid)
616 { 616 {
617 struct user_namespace *ns = current_user_ns(); 617 struct user_namespace *ns = current_user_ns();
618 const struct cred *old; 618 const struct cred *old;
619 struct cred *new; 619 struct cred *new;
620 int retval; 620 int retval;
621 kgid_t kgid; 621 kgid_t kgid;
622 622
623 kgid = make_kgid(ns, gid); 623 kgid = make_kgid(ns, gid);
624 if (!gid_valid(kgid)) 624 if (!gid_valid(kgid))
625 return -EINVAL; 625 return -EINVAL;
626 626
627 new = prepare_creds(); 627 new = prepare_creds();
628 if (!new) 628 if (!new)
629 return -ENOMEM; 629 return -ENOMEM;
630 old = current_cred(); 630 old = current_cred();
631 631
632 retval = -EPERM; 632 retval = -EPERM;
633 if (nsown_capable(CAP_SETGID)) 633 if (nsown_capable(CAP_SETGID))
634 new->gid = new->egid = new->sgid = new->fsgid = kgid; 634 new->gid = new->egid = new->sgid = new->fsgid = kgid;
635 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 635 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
636 new->egid = new->fsgid = kgid; 636 new->egid = new->fsgid = kgid;
637 else 637 else
638 goto error; 638 goto error;
639 639
640 return commit_creds(new); 640 return commit_creds(new);
641 641
642 error: 642 error:
643 abort_creds(new); 643 abort_creds(new);
644 return retval; 644 return retval;
645 } 645 }
646 646
647 /* 647 /*
648 * change the user struct in a credentials set to match the new UID 648 * change the user struct in a credentials set to match the new UID
649 */ 649 */
650 static int set_user(struct cred *new) 650 static int set_user(struct cred *new)
651 { 651 {
652 struct user_struct *new_user; 652 struct user_struct *new_user;
653 653
654 new_user = alloc_uid(new->uid); 654 new_user = alloc_uid(new->uid);
655 if (!new_user) 655 if (!new_user)
656 return -EAGAIN; 656 return -EAGAIN;
657 657
658 /* 658 /*
659 * We don't fail in case of NPROC limit excess here because too many 659 * We don't fail in case of NPROC limit excess here because too many
660 * poorly written programs don't check set*uid() return code, assuming 660 * poorly written programs don't check set*uid() return code, assuming
661 * it never fails if called by root. We may still enforce NPROC limit 661 * it never fails if called by root. We may still enforce NPROC limit
662 * for programs doing set*uid()+execve() by harmlessly deferring the 662 * for programs doing set*uid()+execve() by harmlessly deferring the
663 * failure to the execve() stage. 663 * failure to the execve() stage.
664 */ 664 */
665 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && 665 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
666 new_user != INIT_USER) 666 new_user != INIT_USER)
667 current->flags |= PF_NPROC_EXCEEDED; 667 current->flags |= PF_NPROC_EXCEEDED;
668 else 668 else
669 current->flags &= ~PF_NPROC_EXCEEDED; 669 current->flags &= ~PF_NPROC_EXCEEDED;
670 670
671 free_uid(new->user); 671 free_uid(new->user);
672 new->user = new_user; 672 new->user = new_user;
673 return 0; 673 return 0;
674 } 674 }
675 675
676 /* 676 /*
677 * Unprivileged users may change the real uid to the effective uid 677 * Unprivileged users may change the real uid to the effective uid
678 * or vice versa. (BSD-style) 678 * or vice versa. (BSD-style)
679 * 679 *
680 * If you set the real uid at all, or set the effective uid to a value not 680 * If you set the real uid at all, or set the effective uid to a value not
681 * equal to the real uid, then the saved uid is set to the new effective uid. 681 * equal to the real uid, then the saved uid is set to the new effective uid.
682 * 682 *
683 * This makes it possible for a setuid program to completely drop its 683 * This makes it possible for a setuid program to completely drop its
684 * privileges, which is often a useful assertion to make when you are doing 684 * privileges, which is often a useful assertion to make when you are doing
685 * a security audit over a program. 685 * a security audit over a program.
686 * 686 *
687 * The general idea is that a program which uses just setreuid() will be 687 * The general idea is that a program which uses just setreuid() will be
688 * 100% compatible with BSD. A program which uses just setuid() will be 688 * 100% compatible with BSD. A program which uses just setuid() will be
689 * 100% compatible with POSIX with saved IDs. 689 * 100% compatible with POSIX with saved IDs.
690 */ 690 */
691 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 691 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
692 { 692 {
693 struct user_namespace *ns = current_user_ns(); 693 struct user_namespace *ns = current_user_ns();
694 const struct cred *old; 694 const struct cred *old;
695 struct cred *new; 695 struct cred *new;
696 int retval; 696 int retval;
697 kuid_t kruid, keuid; 697 kuid_t kruid, keuid;
698 698
699 kruid = make_kuid(ns, ruid); 699 kruid = make_kuid(ns, ruid);
700 keuid = make_kuid(ns, euid); 700 keuid = make_kuid(ns, euid);
701 701
702 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 702 if ((ruid != (uid_t) -1) && !uid_valid(kruid))
703 return -EINVAL; 703 return -EINVAL;
704 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 704 if ((euid != (uid_t) -1) && !uid_valid(keuid))
705 return -EINVAL; 705 return -EINVAL;
706 706
707 new = prepare_creds(); 707 new = prepare_creds();
708 if (!new) 708 if (!new)
709 return -ENOMEM; 709 return -ENOMEM;
710 old = current_cred(); 710 old = current_cred();
711 711
712 retval = -EPERM; 712 retval = -EPERM;
713 if (ruid != (uid_t) -1) { 713 if (ruid != (uid_t) -1) {
714 new->uid = kruid; 714 new->uid = kruid;
715 if (!uid_eq(old->uid, kruid) && 715 if (!uid_eq(old->uid, kruid) &&
716 !uid_eq(old->euid, kruid) && 716 !uid_eq(old->euid, kruid) &&
717 !nsown_capable(CAP_SETUID)) 717 !nsown_capable(CAP_SETUID))
718 goto error; 718 goto error;
719 } 719 }
720 720
721 if (euid != (uid_t) -1) { 721 if (euid != (uid_t) -1) {
722 new->euid = keuid; 722 new->euid = keuid;
723 if (!uid_eq(old->uid, keuid) && 723 if (!uid_eq(old->uid, keuid) &&
724 !uid_eq(old->euid, keuid) && 724 !uid_eq(old->euid, keuid) &&
725 !uid_eq(old->suid, keuid) && 725 !uid_eq(old->suid, keuid) &&
726 !nsown_capable(CAP_SETUID)) 726 !nsown_capable(CAP_SETUID))
727 goto error; 727 goto error;
728 } 728 }
729 729
730 if (!uid_eq(new->uid, old->uid)) { 730 if (!uid_eq(new->uid, old->uid)) {
731 retval = set_user(new); 731 retval = set_user(new);
732 if (retval < 0) 732 if (retval < 0)
733 goto error; 733 goto error;
734 } 734 }
735 if (ruid != (uid_t) -1 || 735 if (ruid != (uid_t) -1 ||
736 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) 736 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid)))
737 new->suid = new->euid; 737 new->suid = new->euid;
738 new->fsuid = new->euid; 738 new->fsuid = new->euid;
739 739
740 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 740 retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
741 if (retval < 0) 741 if (retval < 0)
742 goto error; 742 goto error;
743 743
744 return commit_creds(new); 744 return commit_creds(new);
745 745
746 error: 746 error:
747 abort_creds(new); 747 abort_creds(new);
748 return retval; 748 return retval;
749 } 749 }
750 750
751 /* 751 /*
752 * setuid() is implemented like SysV with SAVED_IDS 752 * setuid() is implemented like SysV with SAVED_IDS
753 * 753 *
754 * Note that SAVED_ID's is deficient in that a setuid root program 754 * Note that SAVED_ID's is deficient in that a setuid root program
755 * like sendmail, for example, cannot set its uid to be a normal 755 * like sendmail, for example, cannot set its uid to be a normal
756 * user and then switch back, because if you're root, setuid() sets 756 * user and then switch back, because if you're root, setuid() sets
757 * the saved uid too. If you don't like this, blame the bright people 757 * the saved uid too. If you don't like this, blame the bright people
758 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 758 * in the POSIX committee and/or USG. Note that the BSD-style setreuid()
759 * will allow a root program to temporarily drop privileges and be able to 759 * will allow a root program to temporarily drop privileges and be able to
760 * regain them by swapping the real and effective uid. 760 * regain them by swapping the real and effective uid.
761 */ 761 */
762 SYSCALL_DEFINE1(setuid, uid_t, uid) 762 SYSCALL_DEFINE1(setuid, uid_t, uid)
763 { 763 {
764 struct user_namespace *ns = current_user_ns(); 764 struct user_namespace *ns = current_user_ns();
765 const struct cred *old; 765 const struct cred *old;
766 struct cred *new; 766 struct cred *new;
767 int retval; 767 int retval;
768 kuid_t kuid; 768 kuid_t kuid;
769 769
770 kuid = make_kuid(ns, uid); 770 kuid = make_kuid(ns, uid);
771 if (!uid_valid(kuid)) 771 if (!uid_valid(kuid))
772 return -EINVAL; 772 return -EINVAL;
773 773
774 new = prepare_creds(); 774 new = prepare_creds();
775 if (!new) 775 if (!new)
776 return -ENOMEM; 776 return -ENOMEM;
777 old = current_cred(); 777 old = current_cred();
778 778
779 retval = -EPERM; 779 retval = -EPERM;
780 if (nsown_capable(CAP_SETUID)) { 780 if (nsown_capable(CAP_SETUID)) {
781 new->suid = new->uid = kuid; 781 new->suid = new->uid = kuid;
782 if (!uid_eq(kuid, old->uid)) { 782 if (!uid_eq(kuid, old->uid)) {
783 retval = set_user(new); 783 retval = set_user(new);
784 if (retval < 0) 784 if (retval < 0)
785 goto error; 785 goto error;
786 } 786 }
787 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { 787 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) {
788 goto error; 788 goto error;
789 } 789 }
790 790
791 new->fsuid = new->euid = kuid; 791 new->fsuid = new->euid = kuid;
792 792
793 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 793 retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
794 if (retval < 0) 794 if (retval < 0)
795 goto error; 795 goto error;
796 796
797 return commit_creds(new); 797 return commit_creds(new);
798 798
799 error: 799 error:
800 abort_creds(new); 800 abort_creds(new);
801 return retval; 801 return retval;
802 } 802 }
803 803
804 804
805 /* 805 /*
806 * This function implements a generic ability to update ruid, euid, 806 * This function implements a generic ability to update ruid, euid,
807 * and suid. This allows you to implement the 4.4 compatible seteuid(). 807 * and suid. This allows you to implement the 4.4 compatible seteuid().
808 */ 808 */
809 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 809 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
810 { 810 {
811 struct user_namespace *ns = current_user_ns(); 811 struct user_namespace *ns = current_user_ns();
812 const struct cred *old; 812 const struct cred *old;
813 struct cred *new; 813 struct cred *new;
814 int retval; 814 int retval;
815 kuid_t kruid, keuid, ksuid; 815 kuid_t kruid, keuid, ksuid;
816 816
817 kruid = make_kuid(ns, ruid); 817 kruid = make_kuid(ns, ruid);
818 keuid = make_kuid(ns, euid); 818 keuid = make_kuid(ns, euid);
819 ksuid = make_kuid(ns, suid); 819 ksuid = make_kuid(ns, suid);
820 820
821 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 821 if ((ruid != (uid_t) -1) && !uid_valid(kruid))
822 return -EINVAL; 822 return -EINVAL;
823 823
824 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 824 if ((euid != (uid_t) -1) && !uid_valid(keuid))
825 return -EINVAL; 825 return -EINVAL;
826 826
827 if ((suid != (uid_t) -1) && !uid_valid(ksuid)) 827 if ((suid != (uid_t) -1) && !uid_valid(ksuid))
828 return -EINVAL; 828 return -EINVAL;
829 829
830 new = prepare_creds(); 830 new = prepare_creds();
831 if (!new) 831 if (!new)
832 return -ENOMEM; 832 return -ENOMEM;
833 833
834 old = current_cred(); 834 old = current_cred();
835 835
836 retval = -EPERM; 836 retval = -EPERM;
837 if (!nsown_capable(CAP_SETUID)) { 837 if (!nsown_capable(CAP_SETUID)) {
838 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 838 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
839 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) 839 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
840 goto error; 840 goto error;
841 if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && 841 if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
842 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) 842 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
843 goto error; 843 goto error;
844 if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && 844 if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
845 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) 845 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
846 goto error; 846 goto error;
847 } 847 }
848 848
849 if (ruid != (uid_t) -1) { 849 if (ruid != (uid_t) -1) {
850 new->uid = kruid; 850 new->uid = kruid;
851 if (!uid_eq(kruid, old->uid)) { 851 if (!uid_eq(kruid, old->uid)) {
852 retval = set_user(new); 852 retval = set_user(new);
853 if (retval < 0) 853 if (retval < 0)
854 goto error; 854 goto error;
855 } 855 }
856 } 856 }
857 if (euid != (uid_t) -1) 857 if (euid != (uid_t) -1)
858 new->euid = keuid; 858 new->euid = keuid;
859 if (suid != (uid_t) -1) 859 if (suid != (uid_t) -1)
860 new->suid = ksuid; 860 new->suid = ksuid;
861 new->fsuid = new->euid; 861 new->fsuid = new->euid;
862 862
863 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 863 retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
864 if (retval < 0) 864 if (retval < 0)
865 goto error; 865 goto error;
866 866
867 return commit_creds(new); 867 return commit_creds(new);
868 868
869 error: 869 error:
870 abort_creds(new); 870 abort_creds(new);
871 return retval; 871 return retval;
872 } 872 }
873 873
874 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) 874 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp)
875 { 875 {
876 const struct cred *cred = current_cred(); 876 const struct cred *cred = current_cred();
877 int retval; 877 int retval;
878 uid_t ruid, euid, suid; 878 uid_t ruid, euid, suid;
879 879
880 ruid = from_kuid_munged(cred->user_ns, cred->uid); 880 ruid = from_kuid_munged(cred->user_ns, cred->uid);
881 euid = from_kuid_munged(cred->user_ns, cred->euid); 881 euid = from_kuid_munged(cred->user_ns, cred->euid);
882 suid = from_kuid_munged(cred->user_ns, cred->suid); 882 suid = from_kuid_munged(cred->user_ns, cred->suid);
883 883
884 if (!(retval = put_user(ruid, ruidp)) && 884 if (!(retval = put_user(ruid, ruidp)) &&
885 !(retval = put_user(euid, euidp))) 885 !(retval = put_user(euid, euidp)))
886 retval = put_user(suid, suidp); 886 retval = put_user(suid, suidp);
887 887
888 return retval; 888 return retval;
889 } 889 }
890 890
891 /* 891 /*
892 * Same as above, but for rgid, egid, sgid. 892 * Same as above, but for rgid, egid, sgid.
893 */ 893 */
894 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 894 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
895 { 895 {
896 struct user_namespace *ns = current_user_ns(); 896 struct user_namespace *ns = current_user_ns();
897 const struct cred *old; 897 const struct cred *old;
898 struct cred *new; 898 struct cred *new;
899 int retval; 899 int retval;
900 kgid_t krgid, kegid, ksgid; 900 kgid_t krgid, kegid, ksgid;
901 901
902 krgid = make_kgid(ns, rgid); 902 krgid = make_kgid(ns, rgid);
903 kegid = make_kgid(ns, egid); 903 kegid = make_kgid(ns, egid);
904 ksgid = make_kgid(ns, sgid); 904 ksgid = make_kgid(ns, sgid);
905 905
906 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 906 if ((rgid != (gid_t) -1) && !gid_valid(krgid))
907 return -EINVAL; 907 return -EINVAL;
908 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 908 if ((egid != (gid_t) -1) && !gid_valid(kegid))
909 return -EINVAL; 909 return -EINVAL;
910 if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) 910 if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
911 return -EINVAL; 911 return -EINVAL;
912 912
913 new = prepare_creds(); 913 new = prepare_creds();
914 if (!new) 914 if (!new)
915 return -ENOMEM; 915 return -ENOMEM;
916 old = current_cred(); 916 old = current_cred();
917 917
918 retval = -EPERM; 918 retval = -EPERM;
919 if (!nsown_capable(CAP_SETGID)) { 919 if (!nsown_capable(CAP_SETGID)) {
920 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 920 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
921 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) 921 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
922 goto error; 922 goto error;
923 if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && 923 if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
924 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) 924 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
925 goto error; 925 goto error;
926 if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && 926 if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
927 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) 927 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
928 goto error; 928 goto error;
929 } 929 }
930 930
931 if (rgid != (gid_t) -1) 931 if (rgid != (gid_t) -1)
932 new->gid = krgid; 932 new->gid = krgid;
933 if (egid != (gid_t) -1) 933 if (egid != (gid_t) -1)
934 new->egid = kegid; 934 new->egid = kegid;
935 if (sgid != (gid_t) -1) 935 if (sgid != (gid_t) -1)
936 new->sgid = ksgid; 936 new->sgid = ksgid;
937 new->fsgid = new->egid; 937 new->fsgid = new->egid;
938 938
939 return commit_creds(new); 939 return commit_creds(new);
940 940
941 error: 941 error:
942 abort_creds(new); 942 abort_creds(new);
943 return retval; 943 return retval;
944 } 944 }
945 945
946 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) 946 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp)
947 { 947 {
948 const struct cred *cred = current_cred(); 948 const struct cred *cred = current_cred();
949 int retval; 949 int retval;
950 gid_t rgid, egid, sgid; 950 gid_t rgid, egid, sgid;
951 951
952 rgid = from_kgid_munged(cred->user_ns, cred->gid); 952 rgid = from_kgid_munged(cred->user_ns, cred->gid);
953 egid = from_kgid_munged(cred->user_ns, cred->egid); 953 egid = from_kgid_munged(cred->user_ns, cred->egid);
954 sgid = from_kgid_munged(cred->user_ns, cred->sgid); 954 sgid = from_kgid_munged(cred->user_ns, cred->sgid);
955 955
956 if (!(retval = put_user(rgid, rgidp)) && 956 if (!(retval = put_user(rgid, rgidp)) &&
957 !(retval = put_user(egid, egidp))) 957 !(retval = put_user(egid, egidp)))
958 retval = put_user(sgid, sgidp); 958 retval = put_user(sgid, sgidp);
959 959
960 return retval; 960 return retval;
961 } 961 }
962 962
963 963
964 /* 964 /*
965 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 965 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
966 * is used for "access()" and for the NFS daemon (letting nfsd stay at 966 * is used for "access()" and for the NFS daemon (letting nfsd stay at
967 * whatever uid it wants to). It normally shadows "euid", except when 967 * whatever uid it wants to). It normally shadows "euid", except when
968 * explicitly set by setfsuid() or for access.. 968 * explicitly set by setfsuid() or for access..
969 */ 969 */
970 SYSCALL_DEFINE1(setfsuid, uid_t, uid) 970 SYSCALL_DEFINE1(setfsuid, uid_t, uid)
971 { 971 {
972 const struct cred *old; 972 const struct cred *old;
973 struct cred *new; 973 struct cred *new;
974 uid_t old_fsuid; 974 uid_t old_fsuid;
975 kuid_t kuid; 975 kuid_t kuid;
976 976
977 old = current_cred(); 977 old = current_cred();
978 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); 978 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid);
979 979
980 kuid = make_kuid(old->user_ns, uid); 980 kuid = make_kuid(old->user_ns, uid);
981 if (!uid_valid(kuid)) 981 if (!uid_valid(kuid))
982 return old_fsuid; 982 return old_fsuid;
983 983
984 new = prepare_creds(); 984 new = prepare_creds();
985 if (!new) 985 if (!new)
986 return old_fsuid; 986 return old_fsuid;
987 987
988 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 988 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) ||
989 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 989 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
990 nsown_capable(CAP_SETUID)) { 990 nsown_capable(CAP_SETUID)) {
991 if (!uid_eq(kuid, old->fsuid)) { 991 if (!uid_eq(kuid, old->fsuid)) {
992 new->fsuid = kuid; 992 new->fsuid = kuid;
993 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 993 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
994 goto change_okay; 994 goto change_okay;
995 } 995 }
996 } 996 }
997 997
998 abort_creds(new); 998 abort_creds(new);
999 return old_fsuid; 999 return old_fsuid;
1000 1000
1001 change_okay: 1001 change_okay:
1002 commit_creds(new); 1002 commit_creds(new);
1003 return old_fsuid; 1003 return old_fsuid;
1004 } 1004 }
1005 1005
1006 /* 1006 /*
1007 * Samma pรฅ svenska.. 1007 * Samma pรฅ svenska..
1008 */ 1008 */
1009 SYSCALL_DEFINE1(setfsgid, gid_t, gid) 1009 SYSCALL_DEFINE1(setfsgid, gid_t, gid)
1010 { 1010 {
1011 const struct cred *old; 1011 const struct cred *old;
1012 struct cred *new; 1012 struct cred *new;
1013 gid_t old_fsgid; 1013 gid_t old_fsgid;
1014 kgid_t kgid; 1014 kgid_t kgid;
1015 1015
1016 old = current_cred(); 1016 old = current_cred();
1017 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); 1017 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid);
1018 1018
1019 kgid = make_kgid(old->user_ns, gid); 1019 kgid = make_kgid(old->user_ns, gid);
1020 if (!gid_valid(kgid)) 1020 if (!gid_valid(kgid))
1021 return old_fsgid; 1021 return old_fsgid;
1022 1022
1023 new = prepare_creds(); 1023 new = prepare_creds();
1024 if (!new) 1024 if (!new)
1025 return old_fsgid; 1025 return old_fsgid;
1026 1026
1027 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 1027 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) ||
1028 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 1028 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
1029 nsown_capable(CAP_SETGID)) { 1029 nsown_capable(CAP_SETGID)) {
1030 if (!gid_eq(kgid, old->fsgid)) { 1030 if (!gid_eq(kgid, old->fsgid)) {
1031 new->fsgid = kgid; 1031 new->fsgid = kgid;
1032 goto change_okay; 1032 goto change_okay;
1033 } 1033 }
1034 } 1034 }
1035 1035
1036 abort_creds(new); 1036 abort_creds(new);
1037 return old_fsgid; 1037 return old_fsgid;
1038 1038
1039 change_okay: 1039 change_okay:
1040 commit_creds(new); 1040 commit_creds(new);
1041 return old_fsgid; 1041 return old_fsgid;
1042 } 1042 }
1043 1043
1044 void do_sys_times(struct tms *tms) 1044 void do_sys_times(struct tms *tms)
1045 { 1045 {
1046 cputime_t tgutime, tgstime, cutime, cstime; 1046 cputime_t tgutime, tgstime, cutime, cstime;
1047 1047
1048 spin_lock_irq(&current->sighand->siglock); 1048 spin_lock_irq(&current->sighand->siglock);
1049 thread_group_times(current, &tgutime, &tgstime); 1049 thread_group_cputime_adjusted(current, &tgutime, &tgstime);
1050 cutime = current->signal->cutime; 1050 cutime = current->signal->cutime;
1051 cstime = current->signal->cstime; 1051 cstime = current->signal->cstime;
1052 spin_unlock_irq(&current->sighand->siglock); 1052 spin_unlock_irq(&current->sighand->siglock);
1053 tms->tms_utime = cputime_to_clock_t(tgutime); 1053 tms->tms_utime = cputime_to_clock_t(tgutime);
1054 tms->tms_stime = cputime_to_clock_t(tgstime); 1054 tms->tms_stime = cputime_to_clock_t(tgstime);
1055 tms->tms_cutime = cputime_to_clock_t(cutime); 1055 tms->tms_cutime = cputime_to_clock_t(cutime);
1056 tms->tms_cstime = cputime_to_clock_t(cstime); 1056 tms->tms_cstime = cputime_to_clock_t(cstime);
1057 } 1057 }
1058 1058
1059 SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 1059 SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
1060 { 1060 {
1061 if (tbuf) { 1061 if (tbuf) {
1062 struct tms tmp; 1062 struct tms tmp;
1063 1063
1064 do_sys_times(&tmp); 1064 do_sys_times(&tmp);
1065 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1065 if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
1066 return -EFAULT; 1066 return -EFAULT;
1067 } 1067 }
1068 force_successful_syscall_return(); 1068 force_successful_syscall_return();
1069 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1069 return (long) jiffies_64_to_clock_t(get_jiffies_64());
1070 } 1070 }
1071 1071
1072 /* 1072 /*
1073 * This needs some heavy checking ... 1073 * This needs some heavy checking ...
1074 * I just haven't the stomach for it. I also don't fully 1074 * I just haven't the stomach for it. I also don't fully
1075 * understand sessions/pgrp etc. Let somebody who does explain it. 1075 * understand sessions/pgrp etc. Let somebody who does explain it.
1076 * 1076 *
1077 * OK, I think I have the protection semantics right.... this is really 1077 * OK, I think I have the protection semantics right.... this is really
1078 * only important on a multi-user system anyway, to make sure one user 1078 * only important on a multi-user system anyway, to make sure one user
1079 * can't send a signal to a process owned by another. -TYT, 12/12/91 1079 * can't send a signal to a process owned by another. -TYT, 12/12/91
1080 * 1080 *
1081 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 1081 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
1082 * LBT 04.03.94 1082 * LBT 04.03.94
1083 */ 1083 */
1084 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 1084 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
1085 { 1085 {
1086 struct task_struct *p; 1086 struct task_struct *p;
1087 struct task_struct *group_leader = current->group_leader; 1087 struct task_struct *group_leader = current->group_leader;
1088 struct pid *pgrp; 1088 struct pid *pgrp;
1089 int err; 1089 int err;
1090 1090
1091 if (!pid) 1091 if (!pid)
1092 pid = task_pid_vnr(group_leader); 1092 pid = task_pid_vnr(group_leader);
1093 if (!pgid) 1093 if (!pgid)
1094 pgid = pid; 1094 pgid = pid;
1095 if (pgid < 0) 1095 if (pgid < 0)
1096 return -EINVAL; 1096 return -EINVAL;
1097 rcu_read_lock(); 1097 rcu_read_lock();
1098 1098
1099 /* From this point forward we keep holding onto the tasklist lock 1099 /* From this point forward we keep holding onto the tasklist lock
1100 * so that our parent does not change from under us. -DaveM 1100 * so that our parent does not change from under us. -DaveM
1101 */ 1101 */
1102 write_lock_irq(&tasklist_lock); 1102 write_lock_irq(&tasklist_lock);
1103 1103
1104 err = -ESRCH; 1104 err = -ESRCH;
1105 p = find_task_by_vpid(pid); 1105 p = find_task_by_vpid(pid);
1106 if (!p) 1106 if (!p)
1107 goto out; 1107 goto out;
1108 1108
1109 err = -EINVAL; 1109 err = -EINVAL;
1110 if (!thread_group_leader(p)) 1110 if (!thread_group_leader(p))
1111 goto out; 1111 goto out;
1112 1112
1113 if (same_thread_group(p->real_parent, group_leader)) { 1113 if (same_thread_group(p->real_parent, group_leader)) {
1114 err = -EPERM; 1114 err = -EPERM;
1115 if (task_session(p) != task_session(group_leader)) 1115 if (task_session(p) != task_session(group_leader))
1116 goto out; 1116 goto out;
1117 err = -EACCES; 1117 err = -EACCES;
1118 if (p->did_exec) 1118 if (p->did_exec)
1119 goto out; 1119 goto out;
1120 } else { 1120 } else {
1121 err = -ESRCH; 1121 err = -ESRCH;
1122 if (p != group_leader) 1122 if (p != group_leader)
1123 goto out; 1123 goto out;
1124 } 1124 }
1125 1125
1126 err = -EPERM; 1126 err = -EPERM;
1127 if (p->signal->leader) 1127 if (p->signal->leader)
1128 goto out; 1128 goto out;
1129 1129
1130 pgrp = task_pid(p); 1130 pgrp = task_pid(p);
1131 if (pgid != pid) { 1131 if (pgid != pid) {
1132 struct task_struct *g; 1132 struct task_struct *g;
1133 1133
1134 pgrp = find_vpid(pgid); 1134 pgrp = find_vpid(pgid);
1135 g = pid_task(pgrp, PIDTYPE_PGID); 1135 g = pid_task(pgrp, PIDTYPE_PGID);
1136 if (!g || task_session(g) != task_session(group_leader)) 1136 if (!g || task_session(g) != task_session(group_leader))
1137 goto out; 1137 goto out;
1138 } 1138 }
1139 1139
1140 err = security_task_setpgid(p, pgid); 1140 err = security_task_setpgid(p, pgid);
1141 if (err) 1141 if (err)
1142 goto out; 1142 goto out;
1143 1143
1144 if (task_pgrp(p) != pgrp) 1144 if (task_pgrp(p) != pgrp)
1145 change_pid(p, PIDTYPE_PGID, pgrp); 1145 change_pid(p, PIDTYPE_PGID, pgrp);
1146 1146
1147 err = 0; 1147 err = 0;
1148 out: 1148 out:
1149 /* All paths lead to here, thus we are safe. -DaveM */ 1149 /* All paths lead to here, thus we are safe. -DaveM */
1150 write_unlock_irq(&tasklist_lock); 1150 write_unlock_irq(&tasklist_lock);
1151 rcu_read_unlock(); 1151 rcu_read_unlock();
1152 return err; 1152 return err;
1153 } 1153 }
1154 1154
1155 SYSCALL_DEFINE1(getpgid, pid_t, pid) 1155 SYSCALL_DEFINE1(getpgid, pid_t, pid)
1156 { 1156 {
1157 struct task_struct *p; 1157 struct task_struct *p;
1158 struct pid *grp; 1158 struct pid *grp;
1159 int retval; 1159 int retval;
1160 1160
1161 rcu_read_lock(); 1161 rcu_read_lock();
1162 if (!pid) 1162 if (!pid)
1163 grp = task_pgrp(current); 1163 grp = task_pgrp(current);
1164 else { 1164 else {
1165 retval = -ESRCH; 1165 retval = -ESRCH;
1166 p = find_task_by_vpid(pid); 1166 p = find_task_by_vpid(pid);
1167 if (!p) 1167 if (!p)
1168 goto out; 1168 goto out;
1169 grp = task_pgrp(p); 1169 grp = task_pgrp(p);
1170 if (!grp) 1170 if (!grp)
1171 goto out; 1171 goto out;
1172 1172
1173 retval = security_task_getpgid(p); 1173 retval = security_task_getpgid(p);
1174 if (retval) 1174 if (retval)
1175 goto out; 1175 goto out;
1176 } 1176 }
1177 retval = pid_vnr(grp); 1177 retval = pid_vnr(grp);
1178 out: 1178 out:
1179 rcu_read_unlock(); 1179 rcu_read_unlock();
1180 return retval; 1180 return retval;
1181 } 1181 }
1182 1182
1183 #ifdef __ARCH_WANT_SYS_GETPGRP 1183 #ifdef __ARCH_WANT_SYS_GETPGRP
1184 1184
1185 SYSCALL_DEFINE0(getpgrp) 1185 SYSCALL_DEFINE0(getpgrp)
1186 { 1186 {
1187 return sys_getpgid(0); 1187 return sys_getpgid(0);
1188 } 1188 }
1189 1189
1190 #endif 1190 #endif
1191 1191
1192 SYSCALL_DEFINE1(getsid, pid_t, pid) 1192 SYSCALL_DEFINE1(getsid, pid_t, pid)
1193 { 1193 {
1194 struct task_struct *p; 1194 struct task_struct *p;
1195 struct pid *sid; 1195 struct pid *sid;
1196 int retval; 1196 int retval;
1197 1197
1198 rcu_read_lock(); 1198 rcu_read_lock();
1199 if (!pid) 1199 if (!pid)
1200 sid = task_session(current); 1200 sid = task_session(current);
1201 else { 1201 else {
1202 retval = -ESRCH; 1202 retval = -ESRCH;
1203 p = find_task_by_vpid(pid); 1203 p = find_task_by_vpid(pid);
1204 if (!p) 1204 if (!p)
1205 goto out; 1205 goto out;
1206 sid = task_session(p); 1206 sid = task_session(p);
1207 if (!sid) 1207 if (!sid)
1208 goto out; 1208 goto out;
1209 1209
1210 retval = security_task_getsid(p); 1210 retval = security_task_getsid(p);
1211 if (retval) 1211 if (retval)
1212 goto out; 1212 goto out;
1213 } 1213 }
1214 retval = pid_vnr(sid); 1214 retval = pid_vnr(sid);
1215 out: 1215 out:
1216 rcu_read_unlock(); 1216 rcu_read_unlock();
1217 return retval; 1217 return retval;
1218 } 1218 }
1219 1219
1220 SYSCALL_DEFINE0(setsid) 1220 SYSCALL_DEFINE0(setsid)
1221 { 1221 {
1222 struct task_struct *group_leader = current->group_leader; 1222 struct task_struct *group_leader = current->group_leader;
1223 struct pid *sid = task_pid(group_leader); 1223 struct pid *sid = task_pid(group_leader);
1224 pid_t session = pid_vnr(sid); 1224 pid_t session = pid_vnr(sid);
1225 int err = -EPERM; 1225 int err = -EPERM;
1226 1226
1227 write_lock_irq(&tasklist_lock); 1227 write_lock_irq(&tasklist_lock);
1228 /* Fail if I am already a session leader */ 1228 /* Fail if I am already a session leader */
1229 if (group_leader->signal->leader) 1229 if (group_leader->signal->leader)
1230 goto out; 1230 goto out;
1231 1231
1232 /* Fail if a process group id already exists that equals the 1232 /* Fail if a process group id already exists that equals the
1233 * proposed session id. 1233 * proposed session id.
1234 */ 1234 */
1235 if (pid_task(sid, PIDTYPE_PGID)) 1235 if (pid_task(sid, PIDTYPE_PGID))
1236 goto out; 1236 goto out;
1237 1237
1238 group_leader->signal->leader = 1; 1238 group_leader->signal->leader = 1;
1239 __set_special_pids(sid); 1239 __set_special_pids(sid);
1240 1240
1241 proc_clear_tty(group_leader); 1241 proc_clear_tty(group_leader);
1242 1242
1243 err = session; 1243 err = session;
1244 out: 1244 out:
1245 write_unlock_irq(&tasklist_lock); 1245 write_unlock_irq(&tasklist_lock);
1246 if (err > 0) { 1246 if (err > 0) {
1247 proc_sid_connector(group_leader); 1247 proc_sid_connector(group_leader);
1248 sched_autogroup_create_attach(group_leader); 1248 sched_autogroup_create_attach(group_leader);
1249 } 1249 }
1250 return err; 1250 return err;
1251 } 1251 }
1252 1252
1253 DECLARE_RWSEM(uts_sem); 1253 DECLARE_RWSEM(uts_sem);
1254 1254
1255 #ifdef COMPAT_UTS_MACHINE 1255 #ifdef COMPAT_UTS_MACHINE
1256 #define override_architecture(name) \ 1256 #define override_architecture(name) \
1257 (personality(current->personality) == PER_LINUX32 && \ 1257 (personality(current->personality) == PER_LINUX32 && \
1258 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1258 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
1259 sizeof(COMPAT_UTS_MACHINE))) 1259 sizeof(COMPAT_UTS_MACHINE)))
1260 #else 1260 #else
1261 #define override_architecture(name) 0 1261 #define override_architecture(name) 0
1262 #endif 1262 #endif
1263 1263
1264 /* 1264 /*
1265 * Work around broken programs that cannot handle "Linux 3.0". 1265 * Work around broken programs that cannot handle "Linux 3.0".
1266 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1266 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
1267 */ 1267 */
1268 static int override_release(char __user *release, size_t len) 1268 static int override_release(char __user *release, size_t len)
1269 { 1269 {
1270 int ret = 0; 1270 int ret = 0;
1271 1271
1272 if (current->personality & UNAME26) { 1272 if (current->personality & UNAME26) {
1273 const char *rest = UTS_RELEASE; 1273 const char *rest = UTS_RELEASE;
1274 char buf[65] = { 0 }; 1274 char buf[65] = { 0 };
1275 int ndots = 0; 1275 int ndots = 0;
1276 unsigned v; 1276 unsigned v;
1277 size_t copy; 1277 size_t copy;
1278 1278
1279 while (*rest) { 1279 while (*rest) {
1280 if (*rest == '.' && ++ndots >= 3) 1280 if (*rest == '.' && ++ndots >= 3)
1281 break; 1281 break;
1282 if (!isdigit(*rest) && *rest != '.') 1282 if (!isdigit(*rest) && *rest != '.')
1283 break; 1283 break;
1284 rest++; 1284 rest++;
1285 } 1285 }
1286 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; 1286 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
1287 copy = clamp_t(size_t, len, 1, sizeof(buf)); 1287 copy = clamp_t(size_t, len, 1, sizeof(buf));
1288 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); 1288 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
1289 ret = copy_to_user(release, buf, copy + 1); 1289 ret = copy_to_user(release, buf, copy + 1);
1290 } 1290 }
1291 return ret; 1291 return ret;
1292 } 1292 }
1293 1293
1294 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1294 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1295 { 1295 {
1296 int errno = 0; 1296 int errno = 0;
1297 1297
1298 down_read(&uts_sem); 1298 down_read(&uts_sem);
1299 if (copy_to_user(name, utsname(), sizeof *name)) 1299 if (copy_to_user(name, utsname(), sizeof *name))
1300 errno = -EFAULT; 1300 errno = -EFAULT;
1301 up_read(&uts_sem); 1301 up_read(&uts_sem);
1302 1302
1303 if (!errno && override_release(name->release, sizeof(name->release))) 1303 if (!errno && override_release(name->release, sizeof(name->release)))
1304 errno = -EFAULT; 1304 errno = -EFAULT;
1305 if (!errno && override_architecture(name)) 1305 if (!errno && override_architecture(name))
1306 errno = -EFAULT; 1306 errno = -EFAULT;
1307 return errno; 1307 return errno;
1308 } 1308 }
1309 1309
1310 #ifdef __ARCH_WANT_SYS_OLD_UNAME 1310 #ifdef __ARCH_WANT_SYS_OLD_UNAME
1311 /* 1311 /*
1312 * Old cruft 1312 * Old cruft
1313 */ 1313 */
1314 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1314 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
1315 { 1315 {
1316 int error = 0; 1316 int error = 0;
1317 1317
1318 if (!name) 1318 if (!name)
1319 return -EFAULT; 1319 return -EFAULT;
1320 1320
1321 down_read(&uts_sem); 1321 down_read(&uts_sem);
1322 if (copy_to_user(name, utsname(), sizeof(*name))) 1322 if (copy_to_user(name, utsname(), sizeof(*name)))
1323 error = -EFAULT; 1323 error = -EFAULT;
1324 up_read(&uts_sem); 1324 up_read(&uts_sem);
1325 1325
1326 if (!error && override_release(name->release, sizeof(name->release))) 1326 if (!error && override_release(name->release, sizeof(name->release)))
1327 error = -EFAULT; 1327 error = -EFAULT;
1328 if (!error && override_architecture(name)) 1328 if (!error && override_architecture(name))
1329 error = -EFAULT; 1329 error = -EFAULT;
1330 return error; 1330 return error;
1331 } 1331 }
1332 1332
1333 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1333 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
1334 { 1334 {
1335 int error; 1335 int error;
1336 1336
1337 if (!name) 1337 if (!name)
1338 return -EFAULT; 1338 return -EFAULT;
1339 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) 1339 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
1340 return -EFAULT; 1340 return -EFAULT;
1341 1341
1342 down_read(&uts_sem); 1342 down_read(&uts_sem);
1343 error = __copy_to_user(&name->sysname, &utsname()->sysname, 1343 error = __copy_to_user(&name->sysname, &utsname()->sysname,
1344 __OLD_UTS_LEN); 1344 __OLD_UTS_LEN);
1345 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 1345 error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
1346 error |= __copy_to_user(&name->nodename, &utsname()->nodename, 1346 error |= __copy_to_user(&name->nodename, &utsname()->nodename,
1347 __OLD_UTS_LEN); 1347 __OLD_UTS_LEN);
1348 error |= __put_user(0, name->nodename + __OLD_UTS_LEN); 1348 error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
1349 error |= __copy_to_user(&name->release, &utsname()->release, 1349 error |= __copy_to_user(&name->release, &utsname()->release,
1350 __OLD_UTS_LEN); 1350 __OLD_UTS_LEN);
1351 error |= __put_user(0, name->release + __OLD_UTS_LEN); 1351 error |= __put_user(0, name->release + __OLD_UTS_LEN);
1352 error |= __copy_to_user(&name->version, &utsname()->version, 1352 error |= __copy_to_user(&name->version, &utsname()->version,
1353 __OLD_UTS_LEN); 1353 __OLD_UTS_LEN);
1354 error |= __put_user(0, name->version + __OLD_UTS_LEN); 1354 error |= __put_user(0, name->version + __OLD_UTS_LEN);
1355 error |= __copy_to_user(&name->machine, &utsname()->machine, 1355 error |= __copy_to_user(&name->machine, &utsname()->machine,
1356 __OLD_UTS_LEN); 1356 __OLD_UTS_LEN);
1357 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 1357 error |= __put_user(0, name->machine + __OLD_UTS_LEN);
1358 up_read(&uts_sem); 1358 up_read(&uts_sem);
1359 1359
1360 if (!error && override_architecture(name)) 1360 if (!error && override_architecture(name))
1361 error = -EFAULT; 1361 error = -EFAULT;
1362 if (!error && override_release(name->release, sizeof(name->release))) 1362 if (!error && override_release(name->release, sizeof(name->release)))
1363 error = -EFAULT; 1363 error = -EFAULT;
1364 return error ? -EFAULT : 0; 1364 return error ? -EFAULT : 0;
1365 } 1365 }
1366 #endif 1366 #endif
1367 1367
1368 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1368 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1369 { 1369 {
1370 int errno; 1370 int errno;
1371 char tmp[__NEW_UTS_LEN]; 1371 char tmp[__NEW_UTS_LEN];
1372 1372
1373 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1373 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1374 return -EPERM; 1374 return -EPERM;
1375 1375
1376 if (len < 0 || len > __NEW_UTS_LEN) 1376 if (len < 0 || len > __NEW_UTS_LEN)
1377 return -EINVAL; 1377 return -EINVAL;
1378 down_write(&uts_sem); 1378 down_write(&uts_sem);
1379 errno = -EFAULT; 1379 errno = -EFAULT;
1380 if (!copy_from_user(tmp, name, len)) { 1380 if (!copy_from_user(tmp, name, len)) {
1381 struct new_utsname *u = utsname(); 1381 struct new_utsname *u = utsname();
1382 1382
1383 memcpy(u->nodename, tmp, len); 1383 memcpy(u->nodename, tmp, len);
1384 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1384 memset(u->nodename + len, 0, sizeof(u->nodename) - len);
1385 errno = 0; 1385 errno = 0;
1386 uts_proc_notify(UTS_PROC_HOSTNAME); 1386 uts_proc_notify(UTS_PROC_HOSTNAME);
1387 } 1387 }
1388 up_write(&uts_sem); 1388 up_write(&uts_sem);
1389 return errno; 1389 return errno;
1390 } 1390 }
1391 1391
1392 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1392 #ifdef __ARCH_WANT_SYS_GETHOSTNAME
1393 1393
1394 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1394 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
1395 { 1395 {
1396 int i, errno; 1396 int i, errno;
1397 struct new_utsname *u; 1397 struct new_utsname *u;
1398 1398
1399 if (len < 0) 1399 if (len < 0)
1400 return -EINVAL; 1400 return -EINVAL;
1401 down_read(&uts_sem); 1401 down_read(&uts_sem);
1402 u = utsname(); 1402 u = utsname();
1403 i = 1 + strlen(u->nodename); 1403 i = 1 + strlen(u->nodename);
1404 if (i > len) 1404 if (i > len)
1405 i = len; 1405 i = len;
1406 errno = 0; 1406 errno = 0;
1407 if (copy_to_user(name, u->nodename, i)) 1407 if (copy_to_user(name, u->nodename, i))
1408 errno = -EFAULT; 1408 errno = -EFAULT;
1409 up_read(&uts_sem); 1409 up_read(&uts_sem);
1410 return errno; 1410 return errno;
1411 } 1411 }
1412 1412
1413 #endif 1413 #endif
1414 1414
1415 /* 1415 /*
1416 * Only setdomainname; getdomainname can be implemented by calling 1416 * Only setdomainname; getdomainname can be implemented by calling
1417 * uname() 1417 * uname()
1418 */ 1418 */
1419 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1419 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1420 { 1420 {
1421 int errno; 1421 int errno;
1422 char tmp[__NEW_UTS_LEN]; 1422 char tmp[__NEW_UTS_LEN];
1423 1423
1424 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1424 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1425 return -EPERM; 1425 return -EPERM;
1426 if (len < 0 || len > __NEW_UTS_LEN) 1426 if (len < 0 || len > __NEW_UTS_LEN)
1427 return -EINVAL; 1427 return -EINVAL;
1428 1428
1429 down_write(&uts_sem); 1429 down_write(&uts_sem);
1430 errno = -EFAULT; 1430 errno = -EFAULT;
1431 if (!copy_from_user(tmp, name, len)) { 1431 if (!copy_from_user(tmp, name, len)) {
1432 struct new_utsname *u = utsname(); 1432 struct new_utsname *u = utsname();
1433 1433
1434 memcpy(u->domainname, tmp, len); 1434 memcpy(u->domainname, tmp, len);
1435 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1435 memset(u->domainname + len, 0, sizeof(u->domainname) - len);
1436 errno = 0; 1436 errno = 0;
1437 uts_proc_notify(UTS_PROC_DOMAINNAME); 1437 uts_proc_notify(UTS_PROC_DOMAINNAME);
1438 } 1438 }
1439 up_write(&uts_sem); 1439 up_write(&uts_sem);
1440 return errno; 1440 return errno;
1441 } 1441 }
1442 1442
1443 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1443 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1444 { 1444 {
1445 struct rlimit value; 1445 struct rlimit value;
1446 int ret; 1446 int ret;
1447 1447
1448 ret = do_prlimit(current, resource, NULL, &value); 1448 ret = do_prlimit(current, resource, NULL, &value);
1449 if (!ret) 1449 if (!ret)
1450 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1450 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
1451 1451
1452 return ret; 1452 return ret;
1453 } 1453 }
1454 1454
1455 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1455 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1456 1456
1457 /* 1457 /*
1458 * Back compatibility for getrlimit. Needed for some apps. 1458 * Back compatibility for getrlimit. Needed for some apps.
1459 */ 1459 */
1460 1460
1461 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1461 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
1462 struct rlimit __user *, rlim) 1462 struct rlimit __user *, rlim)
1463 { 1463 {
1464 struct rlimit x; 1464 struct rlimit x;
1465 if (resource >= RLIM_NLIMITS) 1465 if (resource >= RLIM_NLIMITS)
1466 return -EINVAL; 1466 return -EINVAL;
1467 1467
1468 task_lock(current->group_leader); 1468 task_lock(current->group_leader);
1469 x = current->signal->rlim[resource]; 1469 x = current->signal->rlim[resource];
1470 task_unlock(current->group_leader); 1470 task_unlock(current->group_leader);
1471 if (x.rlim_cur > 0x7FFFFFFF) 1471 if (x.rlim_cur > 0x7FFFFFFF)
1472 x.rlim_cur = 0x7FFFFFFF; 1472 x.rlim_cur = 0x7FFFFFFF;
1473 if (x.rlim_max > 0x7FFFFFFF) 1473 if (x.rlim_max > 0x7FFFFFFF)
1474 x.rlim_max = 0x7FFFFFFF; 1474 x.rlim_max = 0x7FFFFFFF;
1475 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1475 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1476 } 1476 }
1477 1477
1478 #endif 1478 #endif
1479 1479
1480 static inline bool rlim64_is_infinity(__u64 rlim64) 1480 static inline bool rlim64_is_infinity(__u64 rlim64)
1481 { 1481 {
1482 #if BITS_PER_LONG < 64 1482 #if BITS_PER_LONG < 64
1483 return rlim64 >= ULONG_MAX; 1483 return rlim64 >= ULONG_MAX;
1484 #else 1484 #else
1485 return rlim64 == RLIM64_INFINITY; 1485 return rlim64 == RLIM64_INFINITY;
1486 #endif 1486 #endif
1487 } 1487 }
1488 1488
1489 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1489 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
1490 { 1490 {
1491 if (rlim->rlim_cur == RLIM_INFINITY) 1491 if (rlim->rlim_cur == RLIM_INFINITY)
1492 rlim64->rlim_cur = RLIM64_INFINITY; 1492 rlim64->rlim_cur = RLIM64_INFINITY;
1493 else 1493 else
1494 rlim64->rlim_cur = rlim->rlim_cur; 1494 rlim64->rlim_cur = rlim->rlim_cur;
1495 if (rlim->rlim_max == RLIM_INFINITY) 1495 if (rlim->rlim_max == RLIM_INFINITY)
1496 rlim64->rlim_max = RLIM64_INFINITY; 1496 rlim64->rlim_max = RLIM64_INFINITY;
1497 else 1497 else
1498 rlim64->rlim_max = rlim->rlim_max; 1498 rlim64->rlim_max = rlim->rlim_max;
1499 } 1499 }
1500 1500
1501 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1501 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
1502 { 1502 {
1503 if (rlim64_is_infinity(rlim64->rlim_cur)) 1503 if (rlim64_is_infinity(rlim64->rlim_cur))
1504 rlim->rlim_cur = RLIM_INFINITY; 1504 rlim->rlim_cur = RLIM_INFINITY;
1505 else 1505 else
1506 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1506 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
1507 if (rlim64_is_infinity(rlim64->rlim_max)) 1507 if (rlim64_is_infinity(rlim64->rlim_max))
1508 rlim->rlim_max = RLIM_INFINITY; 1508 rlim->rlim_max = RLIM_INFINITY;
1509 else 1509 else
1510 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1510 rlim->rlim_max = (unsigned long)rlim64->rlim_max;
1511 } 1511 }
1512 1512
1513 /* make sure you are allowed to change @tsk limits before calling this */ 1513 /* make sure you are allowed to change @tsk limits before calling this */
1514 int do_prlimit(struct task_struct *tsk, unsigned int resource, 1514 int do_prlimit(struct task_struct *tsk, unsigned int resource,
1515 struct rlimit *new_rlim, struct rlimit *old_rlim) 1515 struct rlimit *new_rlim, struct rlimit *old_rlim)
1516 { 1516 {
1517 struct rlimit *rlim; 1517 struct rlimit *rlim;
1518 int retval = 0; 1518 int retval = 0;
1519 1519
1520 if (resource >= RLIM_NLIMITS) 1520 if (resource >= RLIM_NLIMITS)
1521 return -EINVAL; 1521 return -EINVAL;
1522 if (new_rlim) { 1522 if (new_rlim) {
1523 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1523 if (new_rlim->rlim_cur > new_rlim->rlim_max)
1524 return -EINVAL; 1524 return -EINVAL;
1525 if (resource == RLIMIT_NOFILE && 1525 if (resource == RLIMIT_NOFILE &&
1526 new_rlim->rlim_max > sysctl_nr_open) 1526 new_rlim->rlim_max > sysctl_nr_open)
1527 return -EPERM; 1527 return -EPERM;
1528 } 1528 }
1529 1529
1530 /* protect tsk->signal and tsk->sighand from disappearing */ 1530 /* protect tsk->signal and tsk->sighand from disappearing */
1531 read_lock(&tasklist_lock); 1531 read_lock(&tasklist_lock);
1532 if (!tsk->sighand) { 1532 if (!tsk->sighand) {
1533 retval = -ESRCH; 1533 retval = -ESRCH;
1534 goto out; 1534 goto out;
1535 } 1535 }
1536 1536
1537 rlim = tsk->signal->rlim + resource; 1537 rlim = tsk->signal->rlim + resource;
1538 task_lock(tsk->group_leader); 1538 task_lock(tsk->group_leader);
1539 if (new_rlim) { 1539 if (new_rlim) {
1540 /* Keep the capable check against init_user_ns until 1540 /* Keep the capable check against init_user_ns until
1541 cgroups can contain all limits */ 1541 cgroups can contain all limits */
1542 if (new_rlim->rlim_max > rlim->rlim_max && 1542 if (new_rlim->rlim_max > rlim->rlim_max &&
1543 !capable(CAP_SYS_RESOURCE)) 1543 !capable(CAP_SYS_RESOURCE))
1544 retval = -EPERM; 1544 retval = -EPERM;
1545 if (!retval) 1545 if (!retval)
1546 retval = security_task_setrlimit(tsk->group_leader, 1546 retval = security_task_setrlimit(tsk->group_leader,
1547 resource, new_rlim); 1547 resource, new_rlim);
1548 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { 1548 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
1549 /* 1549 /*
1550 * The caller is asking for an immediate RLIMIT_CPU 1550 * The caller is asking for an immediate RLIMIT_CPU
1551 * expiry. But we use the zero value to mean "it was 1551 * expiry. But we use the zero value to mean "it was
1552 * never set". So let's cheat and make it one second 1552 * never set". So let's cheat and make it one second
1553 * instead 1553 * instead
1554 */ 1554 */
1555 new_rlim->rlim_cur = 1; 1555 new_rlim->rlim_cur = 1;
1556 } 1556 }
1557 } 1557 }
1558 if (!retval) { 1558 if (!retval) {
1559 if (old_rlim) 1559 if (old_rlim)
1560 *old_rlim = *rlim; 1560 *old_rlim = *rlim;
1561 if (new_rlim) 1561 if (new_rlim)
1562 *rlim = *new_rlim; 1562 *rlim = *new_rlim;
1563 } 1563 }
1564 task_unlock(tsk->group_leader); 1564 task_unlock(tsk->group_leader);
1565 1565
1566 /* 1566 /*
1567 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1567 * RLIMIT_CPU handling. Note that the kernel fails to return an error
1568 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1568 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a
1569 * very long-standing error, and fixing it now risks breakage of 1569 * very long-standing error, and fixing it now risks breakage of
1570 * applications, so we live with it 1570 * applications, so we live with it
1571 */ 1571 */
1572 if (!retval && new_rlim && resource == RLIMIT_CPU && 1572 if (!retval && new_rlim && resource == RLIMIT_CPU &&
1573 new_rlim->rlim_cur != RLIM_INFINITY) 1573 new_rlim->rlim_cur != RLIM_INFINITY)
1574 update_rlimit_cpu(tsk, new_rlim->rlim_cur); 1574 update_rlimit_cpu(tsk, new_rlim->rlim_cur);
1575 out: 1575 out:
1576 read_unlock(&tasklist_lock); 1576 read_unlock(&tasklist_lock);
1577 return retval; 1577 return retval;
1578 } 1578 }
1579 1579
1580 /* rcu lock must be held */ 1580 /* rcu lock must be held */
1581 static int check_prlimit_permission(struct task_struct *task) 1581 static int check_prlimit_permission(struct task_struct *task)
1582 { 1582 {
1583 const struct cred *cred = current_cred(), *tcred; 1583 const struct cred *cred = current_cred(), *tcred;
1584 1584
1585 if (current == task) 1585 if (current == task)
1586 return 0; 1586 return 0;
1587 1587
1588 tcred = __task_cred(task); 1588 tcred = __task_cred(task);
1589 if (uid_eq(cred->uid, tcred->euid) && 1589 if (uid_eq(cred->uid, tcred->euid) &&
1590 uid_eq(cred->uid, tcred->suid) && 1590 uid_eq(cred->uid, tcred->suid) &&
1591 uid_eq(cred->uid, tcred->uid) && 1591 uid_eq(cred->uid, tcred->uid) &&
1592 gid_eq(cred->gid, tcred->egid) && 1592 gid_eq(cred->gid, tcred->egid) &&
1593 gid_eq(cred->gid, tcred->sgid) && 1593 gid_eq(cred->gid, tcred->sgid) &&
1594 gid_eq(cred->gid, tcred->gid)) 1594 gid_eq(cred->gid, tcred->gid))
1595 return 0; 1595 return 0;
1596 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) 1596 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
1597 return 0; 1597 return 0;
1598 1598
1599 return -EPERM; 1599 return -EPERM;
1600 } 1600 }
1601 1601
1602 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1602 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
1603 const struct rlimit64 __user *, new_rlim, 1603 const struct rlimit64 __user *, new_rlim,
1604 struct rlimit64 __user *, old_rlim) 1604 struct rlimit64 __user *, old_rlim)
1605 { 1605 {
1606 struct rlimit64 old64, new64; 1606 struct rlimit64 old64, new64;
1607 struct rlimit old, new; 1607 struct rlimit old, new;
1608 struct task_struct *tsk; 1608 struct task_struct *tsk;
1609 int ret; 1609 int ret;
1610 1610
1611 if (new_rlim) { 1611 if (new_rlim) {
1612 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1612 if (copy_from_user(&new64, new_rlim, sizeof(new64)))
1613 return -EFAULT; 1613 return -EFAULT;
1614 rlim64_to_rlim(&new64, &new); 1614 rlim64_to_rlim(&new64, &new);
1615 } 1615 }
1616 1616
1617 rcu_read_lock(); 1617 rcu_read_lock();
1618 tsk = pid ? find_task_by_vpid(pid) : current; 1618 tsk = pid ? find_task_by_vpid(pid) : current;
1619 if (!tsk) { 1619 if (!tsk) {
1620 rcu_read_unlock(); 1620 rcu_read_unlock();
1621 return -ESRCH; 1621 return -ESRCH;
1622 } 1622 }
1623 ret = check_prlimit_permission(tsk); 1623 ret = check_prlimit_permission(tsk);
1624 if (ret) { 1624 if (ret) {
1625 rcu_read_unlock(); 1625 rcu_read_unlock();
1626 return ret; 1626 return ret;
1627 } 1627 }
1628 get_task_struct(tsk); 1628 get_task_struct(tsk);
1629 rcu_read_unlock(); 1629 rcu_read_unlock();
1630 1630
1631 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1631 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
1632 old_rlim ? &old : NULL); 1632 old_rlim ? &old : NULL);
1633 1633
1634 if (!ret && old_rlim) { 1634 if (!ret && old_rlim) {
1635 rlim_to_rlim64(&old, &old64); 1635 rlim_to_rlim64(&old, &old64);
1636 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1636 if (copy_to_user(old_rlim, &old64, sizeof(old64)))
1637 ret = -EFAULT; 1637 ret = -EFAULT;
1638 } 1638 }
1639 1639
1640 put_task_struct(tsk); 1640 put_task_struct(tsk);
1641 return ret; 1641 return ret;
1642 } 1642 }
1643 1643
1644 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1644 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1645 { 1645 {
1646 struct rlimit new_rlim; 1646 struct rlimit new_rlim;
1647 1647
1648 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1648 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1649 return -EFAULT; 1649 return -EFAULT;
1650 return do_prlimit(current, resource, &new_rlim, NULL); 1650 return do_prlimit(current, resource, &new_rlim, NULL);
1651 } 1651 }
1652 1652
1653 /* 1653 /*
1654 * It would make sense to put struct rusage in the task_struct, 1654 * It would make sense to put struct rusage in the task_struct,
1655 * except that would make the task_struct be *really big*. After 1655 * except that would make the task_struct be *really big*. After
1656 * task_struct gets moved into malloc'ed memory, it would 1656 * task_struct gets moved into malloc'ed memory, it would
1657 * make sense to do this. It will make moving the rest of the information 1657 * make sense to do this. It will make moving the rest of the information
1658 * a lot simpler! (Which we're not doing right now because we're not 1658 * a lot simpler! (Which we're not doing right now because we're not
1659 * measuring them yet). 1659 * measuring them yet).
1660 * 1660 *
1661 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1661 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1662 * races with threads incrementing their own counters. But since word 1662 * races with threads incrementing their own counters. But since word
1663 * reads are atomic, we either get new values or old values and we don't 1663 * reads are atomic, we either get new values or old values and we don't
1664 * care which for the sums. We always take the siglock to protect reading 1664 * care which for the sums. We always take the siglock to protect reading
1665 * the c* fields from p->signal from races with exit.c updating those 1665 * the c* fields from p->signal from races with exit.c updating those
1666 * fields when reaping, so a sample either gets all the additions of a 1666 * fields when reaping, so a sample either gets all the additions of a
1667 * given child after it's reaped, or none so this sample is before reaping. 1667 * given child after it's reaped, or none so this sample is before reaping.
1668 * 1668 *
1669 * Locking: 1669 * Locking:
1670 * We need to take the siglock for CHILDEREN, SELF and BOTH 1670 * We need to take the siglock for CHILDEREN, SELF and BOTH
1671 * for the cases current multithreaded, non-current single threaded 1671 * for the cases current multithreaded, non-current single threaded
1672 * non-current multithreaded. Thread traversal is now safe with 1672 * non-current multithreaded. Thread traversal is now safe with
1673 * the siglock held. 1673 * the siglock held.
1674 * Strictly speaking, we donot need to take the siglock if we are current and 1674 * Strictly speaking, we donot need to take the siglock if we are current and
1675 * single threaded, as no one else can take our signal_struct away, no one 1675 * single threaded, as no one else can take our signal_struct away, no one
1676 * else can reap the children to update signal->c* counters, and no one else 1676 * else can reap the children to update signal->c* counters, and no one else
1677 * can race with the signal-> fields. If we do not take any lock, the 1677 * can race with the signal-> fields. If we do not take any lock, the
1678 * signal-> fields could be read out of order while another thread was just 1678 * signal-> fields could be read out of order while another thread was just
1679 * exiting. So we should place a read memory barrier when we avoid the lock. 1679 * exiting. So we should place a read memory barrier when we avoid the lock.
1680 * On the writer side, write memory barrier is implied in __exit_signal 1680 * On the writer side, write memory barrier is implied in __exit_signal
1681 * as __exit_signal releases the siglock spinlock after updating the signal-> 1681 * as __exit_signal releases the siglock spinlock after updating the signal->
1682 * fields. But we don't do this yet to keep things simple. 1682 * fields. But we don't do this yet to keep things simple.
1683 * 1683 *
1684 */ 1684 */
1685 1685
1686 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1686 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
1687 { 1687 {
1688 r->ru_nvcsw += t->nvcsw; 1688 r->ru_nvcsw += t->nvcsw;
1689 r->ru_nivcsw += t->nivcsw; 1689 r->ru_nivcsw += t->nivcsw;
1690 r->ru_minflt += t->min_flt; 1690 r->ru_minflt += t->min_flt;
1691 r->ru_majflt += t->maj_flt; 1691 r->ru_majflt += t->maj_flt;
1692 r->ru_inblock += task_io_get_inblock(t); 1692 r->ru_inblock += task_io_get_inblock(t);
1693 r->ru_oublock += task_io_get_oublock(t); 1693 r->ru_oublock += task_io_get_oublock(t);
1694 } 1694 }
1695 1695
1696 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1696 static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1697 { 1697 {
1698 struct task_struct *t; 1698 struct task_struct *t;
1699 unsigned long flags; 1699 unsigned long flags;
1700 cputime_t tgutime, tgstime, utime, stime; 1700 cputime_t tgutime, tgstime, utime, stime;
1701 unsigned long maxrss = 0; 1701 unsigned long maxrss = 0;
1702 1702
1703 memset((char *) r, 0, sizeof *r); 1703 memset((char *) r, 0, sizeof *r);
1704 utime = stime = 0; 1704 utime = stime = 0;
1705 1705
1706 if (who == RUSAGE_THREAD) { 1706 if (who == RUSAGE_THREAD) {
1707 task_times(current, &utime, &stime); 1707 task_cputime_adjusted(current, &utime, &stime);
1708 accumulate_thread_rusage(p, r); 1708 accumulate_thread_rusage(p, r);
1709 maxrss = p->signal->maxrss; 1709 maxrss = p->signal->maxrss;
1710 goto out; 1710 goto out;
1711 } 1711 }
1712 1712
1713 if (!lock_task_sighand(p, &flags)) 1713 if (!lock_task_sighand(p, &flags))
1714 return; 1714 return;
1715 1715
1716 switch (who) { 1716 switch (who) {
1717 case RUSAGE_BOTH: 1717 case RUSAGE_BOTH:
1718 case RUSAGE_CHILDREN: 1718 case RUSAGE_CHILDREN:
1719 utime = p->signal->cutime; 1719 utime = p->signal->cutime;
1720 stime = p->signal->cstime; 1720 stime = p->signal->cstime;
1721 r->ru_nvcsw = p->signal->cnvcsw; 1721 r->ru_nvcsw = p->signal->cnvcsw;
1722 r->ru_nivcsw = p->signal->cnivcsw; 1722 r->ru_nivcsw = p->signal->cnivcsw;
1723 r->ru_minflt = p->signal->cmin_flt; 1723 r->ru_minflt = p->signal->cmin_flt;
1724 r->ru_majflt = p->signal->cmaj_flt; 1724 r->ru_majflt = p->signal->cmaj_flt;
1725 r->ru_inblock = p->signal->cinblock; 1725 r->ru_inblock = p->signal->cinblock;
1726 r->ru_oublock = p->signal->coublock; 1726 r->ru_oublock = p->signal->coublock;
1727 maxrss = p->signal->cmaxrss; 1727 maxrss = p->signal->cmaxrss;
1728 1728
1729 if (who == RUSAGE_CHILDREN) 1729 if (who == RUSAGE_CHILDREN)
1730 break; 1730 break;
1731 1731
1732 case RUSAGE_SELF: 1732 case RUSAGE_SELF:
1733 thread_group_times(p, &tgutime, &tgstime); 1733 thread_group_cputime_adjusted(p, &tgutime, &tgstime);
1734 utime += tgutime; 1734 utime += tgutime;
1735 stime += tgstime; 1735 stime += tgstime;
1736 r->ru_nvcsw += p->signal->nvcsw; 1736 r->ru_nvcsw += p->signal->nvcsw;
1737 r->ru_nivcsw += p->signal->nivcsw; 1737 r->ru_nivcsw += p->signal->nivcsw;
1738 r->ru_minflt += p->signal->min_flt; 1738 r->ru_minflt += p->signal->min_flt;
1739 r->ru_majflt += p->signal->maj_flt; 1739 r->ru_majflt += p->signal->maj_flt;
1740 r->ru_inblock += p->signal->inblock; 1740 r->ru_inblock += p->signal->inblock;
1741 r->ru_oublock += p->signal->oublock; 1741 r->ru_oublock += p->signal->oublock;
1742 if (maxrss < p->signal->maxrss) 1742 if (maxrss < p->signal->maxrss)
1743 maxrss = p->signal->maxrss; 1743 maxrss = p->signal->maxrss;
1744 t = p; 1744 t = p;
1745 do { 1745 do {
1746 accumulate_thread_rusage(t, r); 1746 accumulate_thread_rusage(t, r);
1747 t = next_thread(t); 1747 t = next_thread(t);
1748 } while (t != p); 1748 } while (t != p);
1749 break; 1749 break;
1750 1750
1751 default: 1751 default:
1752 BUG(); 1752 BUG();
1753 } 1753 }
1754 unlock_task_sighand(p, &flags); 1754 unlock_task_sighand(p, &flags);
1755 1755
1756 out: 1756 out:
1757 cputime_to_timeval(utime, &r->ru_utime); 1757 cputime_to_timeval(utime, &r->ru_utime);
1758 cputime_to_timeval(stime, &r->ru_stime); 1758 cputime_to_timeval(stime, &r->ru_stime);
1759 1759
1760 if (who != RUSAGE_CHILDREN) { 1760 if (who != RUSAGE_CHILDREN) {
1761 struct mm_struct *mm = get_task_mm(p); 1761 struct mm_struct *mm = get_task_mm(p);
1762 if (mm) { 1762 if (mm) {
1763 setmax_mm_hiwater_rss(&maxrss, mm); 1763 setmax_mm_hiwater_rss(&maxrss, mm);
1764 mmput(mm); 1764 mmput(mm);
1765 } 1765 }
1766 } 1766 }
1767 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1767 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
1768 } 1768 }
1769 1769
1770 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1770 int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1771 { 1771 {
1772 struct rusage r; 1772 struct rusage r;
1773 k_getrusage(p, who, &r); 1773 k_getrusage(p, who, &r);
1774 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1774 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1775 } 1775 }
1776 1776
1777 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1777 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
1778 { 1778 {
1779 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1779 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1780 who != RUSAGE_THREAD) 1780 who != RUSAGE_THREAD)
1781 return -EINVAL; 1781 return -EINVAL;
1782 return getrusage(current, who, ru); 1782 return getrusage(current, who, ru);
1783 } 1783 }
1784 1784
1785 SYSCALL_DEFINE1(umask, int, mask) 1785 SYSCALL_DEFINE1(umask, int, mask)
1786 { 1786 {
1787 mask = xchg(&current->fs->umask, mask & S_IRWXUGO); 1787 mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1788 return mask; 1788 return mask;
1789 } 1789 }
1790 1790
1791 #ifdef CONFIG_CHECKPOINT_RESTORE 1791 #ifdef CONFIG_CHECKPOINT_RESTORE
1792 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1792 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1793 { 1793 {
1794 struct fd exe; 1794 struct fd exe;
1795 struct dentry *dentry; 1795 struct dentry *dentry;
1796 int err; 1796 int err;
1797 1797
1798 exe = fdget(fd); 1798 exe = fdget(fd);
1799 if (!exe.file) 1799 if (!exe.file)
1800 return -EBADF; 1800 return -EBADF;
1801 1801
1802 dentry = exe.file->f_path.dentry; 1802 dentry = exe.file->f_path.dentry;
1803 1803
1804 /* 1804 /*
1805 * Because the original mm->exe_file points to executable file, make 1805 * Because the original mm->exe_file points to executable file, make
1806 * sure that this one is executable as well, to avoid breaking an 1806 * sure that this one is executable as well, to avoid breaking an
1807 * overall picture. 1807 * overall picture.
1808 */ 1808 */
1809 err = -EACCES; 1809 err = -EACCES;
1810 if (!S_ISREG(dentry->d_inode->i_mode) || 1810 if (!S_ISREG(dentry->d_inode->i_mode) ||
1811 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1811 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC)
1812 goto exit; 1812 goto exit;
1813 1813
1814 err = inode_permission(dentry->d_inode, MAY_EXEC); 1814 err = inode_permission(dentry->d_inode, MAY_EXEC);
1815 if (err) 1815 if (err)
1816 goto exit; 1816 goto exit;
1817 1817
1818 down_write(&mm->mmap_sem); 1818 down_write(&mm->mmap_sem);
1819 1819
1820 /* 1820 /*
1821 * Forbid mm->exe_file change if old file still mapped. 1821 * Forbid mm->exe_file change if old file still mapped.
1822 */ 1822 */
1823 err = -EBUSY; 1823 err = -EBUSY;
1824 if (mm->exe_file) { 1824 if (mm->exe_file) {
1825 struct vm_area_struct *vma; 1825 struct vm_area_struct *vma;
1826 1826
1827 for (vma = mm->mmap; vma; vma = vma->vm_next) 1827 for (vma = mm->mmap; vma; vma = vma->vm_next)
1828 if (vma->vm_file && 1828 if (vma->vm_file &&
1829 path_equal(&vma->vm_file->f_path, 1829 path_equal(&vma->vm_file->f_path,
1830 &mm->exe_file->f_path)) 1830 &mm->exe_file->f_path))
1831 goto exit_unlock; 1831 goto exit_unlock;
1832 } 1832 }
1833 1833
1834 /* 1834 /*
1835 * The symlink can be changed only once, just to disallow arbitrary 1835 * The symlink can be changed only once, just to disallow arbitrary
1836 * transitions malicious software might bring in. This means one 1836 * transitions malicious software might bring in. This means one
1837 * could make a snapshot over all processes running and monitor 1837 * could make a snapshot over all processes running and monitor
1838 * /proc/pid/exe changes to notice unusual activity if needed. 1838 * /proc/pid/exe changes to notice unusual activity if needed.
1839 */ 1839 */
1840 err = -EPERM; 1840 err = -EPERM;
1841 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) 1841 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
1842 goto exit_unlock; 1842 goto exit_unlock;
1843 1843
1844 err = 0; 1844 err = 0;
1845 set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ 1845 set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */
1846 exit_unlock: 1846 exit_unlock:
1847 up_write(&mm->mmap_sem); 1847 up_write(&mm->mmap_sem);
1848 1848
1849 exit: 1849 exit:
1850 fdput(exe); 1850 fdput(exe);
1851 return err; 1851 return err;
1852 } 1852 }
1853 1853
1854 static int prctl_set_mm(int opt, unsigned long addr, 1854 static int prctl_set_mm(int opt, unsigned long addr,
1855 unsigned long arg4, unsigned long arg5) 1855 unsigned long arg4, unsigned long arg5)
1856 { 1856 {
1857 unsigned long rlim = rlimit(RLIMIT_DATA); 1857 unsigned long rlim = rlimit(RLIMIT_DATA);
1858 struct mm_struct *mm = current->mm; 1858 struct mm_struct *mm = current->mm;
1859 struct vm_area_struct *vma; 1859 struct vm_area_struct *vma;
1860 int error; 1860 int error;
1861 1861
1862 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) 1862 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
1863 return -EINVAL; 1863 return -EINVAL;
1864 1864
1865 if (!capable(CAP_SYS_RESOURCE)) 1865 if (!capable(CAP_SYS_RESOURCE))
1866 return -EPERM; 1866 return -EPERM;
1867 1867
1868 if (opt == PR_SET_MM_EXE_FILE) 1868 if (opt == PR_SET_MM_EXE_FILE)
1869 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1869 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
1870 1870
1871 if (addr >= TASK_SIZE || addr < mmap_min_addr) 1871 if (addr >= TASK_SIZE || addr < mmap_min_addr)
1872 return -EINVAL; 1872 return -EINVAL;
1873 1873
1874 error = -EINVAL; 1874 error = -EINVAL;
1875 1875
1876 down_read(&mm->mmap_sem); 1876 down_read(&mm->mmap_sem);
1877 vma = find_vma(mm, addr); 1877 vma = find_vma(mm, addr);
1878 1878
1879 switch (opt) { 1879 switch (opt) {
1880 case PR_SET_MM_START_CODE: 1880 case PR_SET_MM_START_CODE:
1881 mm->start_code = addr; 1881 mm->start_code = addr;
1882 break; 1882 break;
1883 case PR_SET_MM_END_CODE: 1883 case PR_SET_MM_END_CODE:
1884 mm->end_code = addr; 1884 mm->end_code = addr;
1885 break; 1885 break;
1886 case PR_SET_MM_START_DATA: 1886 case PR_SET_MM_START_DATA:
1887 mm->start_data = addr; 1887 mm->start_data = addr;
1888 break; 1888 break;
1889 case PR_SET_MM_END_DATA: 1889 case PR_SET_MM_END_DATA:
1890 mm->end_data = addr; 1890 mm->end_data = addr;
1891 break; 1891 break;
1892 1892
1893 case PR_SET_MM_START_BRK: 1893 case PR_SET_MM_START_BRK:
1894 if (addr <= mm->end_data) 1894 if (addr <= mm->end_data)
1895 goto out; 1895 goto out;
1896 1896
1897 if (rlim < RLIM_INFINITY && 1897 if (rlim < RLIM_INFINITY &&
1898 (mm->brk - addr) + 1898 (mm->brk - addr) +
1899 (mm->end_data - mm->start_data) > rlim) 1899 (mm->end_data - mm->start_data) > rlim)
1900 goto out; 1900 goto out;
1901 1901
1902 mm->start_brk = addr; 1902 mm->start_brk = addr;
1903 break; 1903 break;
1904 1904
1905 case PR_SET_MM_BRK: 1905 case PR_SET_MM_BRK:
1906 if (addr <= mm->end_data) 1906 if (addr <= mm->end_data)
1907 goto out; 1907 goto out;
1908 1908
1909 if (rlim < RLIM_INFINITY && 1909 if (rlim < RLIM_INFINITY &&
1910 (addr - mm->start_brk) + 1910 (addr - mm->start_brk) +
1911 (mm->end_data - mm->start_data) > rlim) 1911 (mm->end_data - mm->start_data) > rlim)
1912 goto out; 1912 goto out;
1913 1913
1914 mm->brk = addr; 1914 mm->brk = addr;
1915 break; 1915 break;
1916 1916
1917 /* 1917 /*
1918 * If command line arguments and environment 1918 * If command line arguments and environment
1919 * are placed somewhere else on stack, we can 1919 * are placed somewhere else on stack, we can
1920 * set them up here, ARG_START/END to setup 1920 * set them up here, ARG_START/END to setup
1921 * command line argumets and ENV_START/END 1921 * command line argumets and ENV_START/END
1922 * for environment. 1922 * for environment.
1923 */ 1923 */
1924 case PR_SET_MM_START_STACK: 1924 case PR_SET_MM_START_STACK:
1925 case PR_SET_MM_ARG_START: 1925 case PR_SET_MM_ARG_START:
1926 case PR_SET_MM_ARG_END: 1926 case PR_SET_MM_ARG_END:
1927 case PR_SET_MM_ENV_START: 1927 case PR_SET_MM_ENV_START:
1928 case PR_SET_MM_ENV_END: 1928 case PR_SET_MM_ENV_END:
1929 if (!vma) { 1929 if (!vma) {
1930 error = -EFAULT; 1930 error = -EFAULT;
1931 goto out; 1931 goto out;
1932 } 1932 }
1933 if (opt == PR_SET_MM_START_STACK) 1933 if (opt == PR_SET_MM_START_STACK)
1934 mm->start_stack = addr; 1934 mm->start_stack = addr;
1935 else if (opt == PR_SET_MM_ARG_START) 1935 else if (opt == PR_SET_MM_ARG_START)
1936 mm->arg_start = addr; 1936 mm->arg_start = addr;
1937 else if (opt == PR_SET_MM_ARG_END) 1937 else if (opt == PR_SET_MM_ARG_END)
1938 mm->arg_end = addr; 1938 mm->arg_end = addr;
1939 else if (opt == PR_SET_MM_ENV_START) 1939 else if (opt == PR_SET_MM_ENV_START)
1940 mm->env_start = addr; 1940 mm->env_start = addr;
1941 else if (opt == PR_SET_MM_ENV_END) 1941 else if (opt == PR_SET_MM_ENV_END)
1942 mm->env_end = addr; 1942 mm->env_end = addr;
1943 break; 1943 break;
1944 1944
1945 /* 1945 /*
1946 * This doesn't move auxiliary vector itself 1946 * This doesn't move auxiliary vector itself
1947 * since it's pinned to mm_struct, but allow 1947 * since it's pinned to mm_struct, but allow
1948 * to fill vector with new values. It's up 1948 * to fill vector with new values. It's up
1949 * to a caller to provide sane values here 1949 * to a caller to provide sane values here
1950 * otherwise user space tools which use this 1950 * otherwise user space tools which use this
1951 * vector might be unhappy. 1951 * vector might be unhappy.
1952 */ 1952 */
1953 case PR_SET_MM_AUXV: { 1953 case PR_SET_MM_AUXV: {
1954 unsigned long user_auxv[AT_VECTOR_SIZE]; 1954 unsigned long user_auxv[AT_VECTOR_SIZE];
1955 1955
1956 if (arg4 > sizeof(user_auxv)) 1956 if (arg4 > sizeof(user_auxv))
1957 goto out; 1957 goto out;
1958 up_read(&mm->mmap_sem); 1958 up_read(&mm->mmap_sem);
1959 1959
1960 if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) 1960 if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
1961 return -EFAULT; 1961 return -EFAULT;
1962 1962
1963 /* Make sure the last entry is always AT_NULL */ 1963 /* Make sure the last entry is always AT_NULL */
1964 user_auxv[AT_VECTOR_SIZE - 2] = 0; 1964 user_auxv[AT_VECTOR_SIZE - 2] = 0;
1965 user_auxv[AT_VECTOR_SIZE - 1] = 0; 1965 user_auxv[AT_VECTOR_SIZE - 1] = 0;
1966 1966
1967 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 1967 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
1968 1968
1969 task_lock(current); 1969 task_lock(current);
1970 memcpy(mm->saved_auxv, user_auxv, arg4); 1970 memcpy(mm->saved_auxv, user_auxv, arg4);
1971 task_unlock(current); 1971 task_unlock(current);
1972 1972
1973 return 0; 1973 return 0;
1974 } 1974 }
1975 default: 1975 default:
1976 goto out; 1976 goto out;
1977 } 1977 }
1978 1978
1979 error = 0; 1979 error = 0;
1980 out: 1980 out:
1981 up_read(&mm->mmap_sem); 1981 up_read(&mm->mmap_sem);
1982 return error; 1982 return error;
1983 } 1983 }
1984 1984
1985 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 1985 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1986 { 1986 {
1987 return put_user(me->clear_child_tid, tid_addr); 1987 return put_user(me->clear_child_tid, tid_addr);
1988 } 1988 }
1989 1989
1990 #else /* CONFIG_CHECKPOINT_RESTORE */ 1990 #else /* CONFIG_CHECKPOINT_RESTORE */
1991 static int prctl_set_mm(int opt, unsigned long addr, 1991 static int prctl_set_mm(int opt, unsigned long addr,
1992 unsigned long arg4, unsigned long arg5) 1992 unsigned long arg4, unsigned long arg5)
1993 { 1993 {
1994 return -EINVAL; 1994 return -EINVAL;
1995 } 1995 }
1996 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 1996 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1997 { 1997 {
1998 return -EINVAL; 1998 return -EINVAL;
1999 } 1999 }
2000 #endif 2000 #endif
2001 2001
2002 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2002 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2003 unsigned long, arg4, unsigned long, arg5) 2003 unsigned long, arg4, unsigned long, arg5)
2004 { 2004 {
2005 struct task_struct *me = current; 2005 struct task_struct *me = current;
2006 unsigned char comm[sizeof(me->comm)]; 2006 unsigned char comm[sizeof(me->comm)];
2007 long error; 2007 long error;
2008 2008
2009 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2009 error = security_task_prctl(option, arg2, arg3, arg4, arg5);
2010 if (error != -ENOSYS) 2010 if (error != -ENOSYS)
2011 return error; 2011 return error;
2012 2012
2013 error = 0; 2013 error = 0;
2014 switch (option) { 2014 switch (option) {
2015 case PR_SET_PDEATHSIG: 2015 case PR_SET_PDEATHSIG:
2016 if (!valid_signal(arg2)) { 2016 if (!valid_signal(arg2)) {
2017 error = -EINVAL; 2017 error = -EINVAL;
2018 break; 2018 break;
2019 } 2019 }
2020 me->pdeath_signal = arg2; 2020 me->pdeath_signal = arg2;
2021 break; 2021 break;
2022 case PR_GET_PDEATHSIG: 2022 case PR_GET_PDEATHSIG:
2023 error = put_user(me->pdeath_signal, (int __user *)arg2); 2023 error = put_user(me->pdeath_signal, (int __user *)arg2);
2024 break; 2024 break;
2025 case PR_GET_DUMPABLE: 2025 case PR_GET_DUMPABLE:
2026 error = get_dumpable(me->mm); 2026 error = get_dumpable(me->mm);
2027 break; 2027 break;
2028 case PR_SET_DUMPABLE: 2028 case PR_SET_DUMPABLE:
2029 if (arg2 < 0 || arg2 > 1) { 2029 if (arg2 < 0 || arg2 > 1) {
2030 error = -EINVAL; 2030 error = -EINVAL;
2031 break; 2031 break;
2032 } 2032 }
2033 set_dumpable(me->mm, arg2); 2033 set_dumpable(me->mm, arg2);
2034 break; 2034 break;
2035 2035
2036 case PR_SET_UNALIGN: 2036 case PR_SET_UNALIGN:
2037 error = SET_UNALIGN_CTL(me, arg2); 2037 error = SET_UNALIGN_CTL(me, arg2);
2038 break; 2038 break;
2039 case PR_GET_UNALIGN: 2039 case PR_GET_UNALIGN:
2040 error = GET_UNALIGN_CTL(me, arg2); 2040 error = GET_UNALIGN_CTL(me, arg2);
2041 break; 2041 break;
2042 case PR_SET_FPEMU: 2042 case PR_SET_FPEMU:
2043 error = SET_FPEMU_CTL(me, arg2); 2043 error = SET_FPEMU_CTL(me, arg2);
2044 break; 2044 break;
2045 case PR_GET_FPEMU: 2045 case PR_GET_FPEMU:
2046 error = GET_FPEMU_CTL(me, arg2); 2046 error = GET_FPEMU_CTL(me, arg2);
2047 break; 2047 break;
2048 case PR_SET_FPEXC: 2048 case PR_SET_FPEXC:
2049 error = SET_FPEXC_CTL(me, arg2); 2049 error = SET_FPEXC_CTL(me, arg2);
2050 break; 2050 break;
2051 case PR_GET_FPEXC: 2051 case PR_GET_FPEXC:
2052 error = GET_FPEXC_CTL(me, arg2); 2052 error = GET_FPEXC_CTL(me, arg2);
2053 break; 2053 break;
2054 case PR_GET_TIMING: 2054 case PR_GET_TIMING:
2055 error = PR_TIMING_STATISTICAL; 2055 error = PR_TIMING_STATISTICAL;
2056 break; 2056 break;
2057 case PR_SET_TIMING: 2057 case PR_SET_TIMING:
2058 if (arg2 != PR_TIMING_STATISTICAL) 2058 if (arg2 != PR_TIMING_STATISTICAL)
2059 error = -EINVAL; 2059 error = -EINVAL;
2060 break; 2060 break;
2061 case PR_SET_NAME: 2061 case PR_SET_NAME:
2062 comm[sizeof(me->comm)-1] = 0; 2062 comm[sizeof(me->comm)-1] = 0;
2063 if (strncpy_from_user(comm, (char __user *)arg2, 2063 if (strncpy_from_user(comm, (char __user *)arg2,
2064 sizeof(me->comm) - 1) < 0) 2064 sizeof(me->comm) - 1) < 0)
2065 return -EFAULT; 2065 return -EFAULT;
2066 set_task_comm(me, comm); 2066 set_task_comm(me, comm);
2067 proc_comm_connector(me); 2067 proc_comm_connector(me);
2068 break; 2068 break;
2069 case PR_GET_NAME: 2069 case PR_GET_NAME:
2070 get_task_comm(comm, me); 2070 get_task_comm(comm, me);
2071 if (copy_to_user((char __user *)arg2, comm, 2071 if (copy_to_user((char __user *)arg2, comm,
2072 sizeof(comm))) 2072 sizeof(comm)))
2073 return -EFAULT; 2073 return -EFAULT;
2074 break; 2074 break;
2075 case PR_GET_ENDIAN: 2075 case PR_GET_ENDIAN:
2076 error = GET_ENDIAN(me, arg2); 2076 error = GET_ENDIAN(me, arg2);
2077 break; 2077 break;
2078 case PR_SET_ENDIAN: 2078 case PR_SET_ENDIAN:
2079 error = SET_ENDIAN(me, arg2); 2079 error = SET_ENDIAN(me, arg2);
2080 break; 2080 break;
2081 case PR_GET_SECCOMP: 2081 case PR_GET_SECCOMP:
2082 error = prctl_get_seccomp(); 2082 error = prctl_get_seccomp();
2083 break; 2083 break;
2084 case PR_SET_SECCOMP: 2084 case PR_SET_SECCOMP:
2085 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2085 error = prctl_set_seccomp(arg2, (char __user *)arg3);
2086 break; 2086 break;
2087 case PR_GET_TSC: 2087 case PR_GET_TSC:
2088 error = GET_TSC_CTL(arg2); 2088 error = GET_TSC_CTL(arg2);
2089 break; 2089 break;
2090 case PR_SET_TSC: 2090 case PR_SET_TSC:
2091 error = SET_TSC_CTL(arg2); 2091 error = SET_TSC_CTL(arg2);
2092 break; 2092 break;
2093 case PR_TASK_PERF_EVENTS_DISABLE: 2093 case PR_TASK_PERF_EVENTS_DISABLE:
2094 error = perf_event_task_disable(); 2094 error = perf_event_task_disable();
2095 break; 2095 break;
2096 case PR_TASK_PERF_EVENTS_ENABLE: 2096 case PR_TASK_PERF_EVENTS_ENABLE:
2097 error = perf_event_task_enable(); 2097 error = perf_event_task_enable();
2098 break; 2098 break;
2099 case PR_GET_TIMERSLACK: 2099 case PR_GET_TIMERSLACK:
2100 error = current->timer_slack_ns; 2100 error = current->timer_slack_ns;
2101 break; 2101 break;
2102 case PR_SET_TIMERSLACK: 2102 case PR_SET_TIMERSLACK:
2103 if (arg2 <= 0) 2103 if (arg2 <= 0)
2104 current->timer_slack_ns = 2104 current->timer_slack_ns =
2105 current->default_timer_slack_ns; 2105 current->default_timer_slack_ns;
2106 else 2106 else
2107 current->timer_slack_ns = arg2; 2107 current->timer_slack_ns = arg2;
2108 break; 2108 break;
2109 case PR_MCE_KILL: 2109 case PR_MCE_KILL:
2110 if (arg4 | arg5) 2110 if (arg4 | arg5)
2111 return -EINVAL; 2111 return -EINVAL;
2112 switch (arg2) { 2112 switch (arg2) {
2113 case PR_MCE_KILL_CLEAR: 2113 case PR_MCE_KILL_CLEAR:
2114 if (arg3 != 0) 2114 if (arg3 != 0)
2115 return -EINVAL; 2115 return -EINVAL;
2116 current->flags &= ~PF_MCE_PROCESS; 2116 current->flags &= ~PF_MCE_PROCESS;
2117 break; 2117 break;
2118 case PR_MCE_KILL_SET: 2118 case PR_MCE_KILL_SET:
2119 current->flags |= PF_MCE_PROCESS; 2119 current->flags |= PF_MCE_PROCESS;
2120 if (arg3 == PR_MCE_KILL_EARLY) 2120 if (arg3 == PR_MCE_KILL_EARLY)
2121 current->flags |= PF_MCE_EARLY; 2121 current->flags |= PF_MCE_EARLY;
2122 else if (arg3 == PR_MCE_KILL_LATE) 2122 else if (arg3 == PR_MCE_KILL_LATE)
2123 current->flags &= ~PF_MCE_EARLY; 2123 current->flags &= ~PF_MCE_EARLY;
2124 else if (arg3 == PR_MCE_KILL_DEFAULT) 2124 else if (arg3 == PR_MCE_KILL_DEFAULT)
2125 current->flags &= 2125 current->flags &=
2126 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2126 ~(PF_MCE_EARLY|PF_MCE_PROCESS);
2127 else 2127 else
2128 return -EINVAL; 2128 return -EINVAL;
2129 break; 2129 break;
2130 default: 2130 default:
2131 return -EINVAL; 2131 return -EINVAL;
2132 } 2132 }
2133 break; 2133 break;
2134 case PR_MCE_KILL_GET: 2134 case PR_MCE_KILL_GET:
2135 if (arg2 | arg3 | arg4 | arg5) 2135 if (arg2 | arg3 | arg4 | arg5)
2136 return -EINVAL; 2136 return -EINVAL;
2137 if (current->flags & PF_MCE_PROCESS) 2137 if (current->flags & PF_MCE_PROCESS)
2138 error = (current->flags & PF_MCE_EARLY) ? 2138 error = (current->flags & PF_MCE_EARLY) ?
2139 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2139 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
2140 else 2140 else
2141 error = PR_MCE_KILL_DEFAULT; 2141 error = PR_MCE_KILL_DEFAULT;
2142 break; 2142 break;
2143 case PR_SET_MM: 2143 case PR_SET_MM:
2144 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2144 error = prctl_set_mm(arg2, arg3, arg4, arg5);
2145 break; 2145 break;
2146 case PR_GET_TID_ADDRESS: 2146 case PR_GET_TID_ADDRESS:
2147 error = prctl_get_tid_address(me, (int __user **)arg2); 2147 error = prctl_get_tid_address(me, (int __user **)arg2);
2148 break; 2148 break;
2149 case PR_SET_CHILD_SUBREAPER: 2149 case PR_SET_CHILD_SUBREAPER:
2150 me->signal->is_child_subreaper = !!arg2; 2150 me->signal->is_child_subreaper = !!arg2;
2151 break; 2151 break;
2152 case PR_GET_CHILD_SUBREAPER: 2152 case PR_GET_CHILD_SUBREAPER:
2153 error = put_user(me->signal->is_child_subreaper, 2153 error = put_user(me->signal->is_child_subreaper,
2154 (int __user *) arg2); 2154 (int __user *) arg2);
2155 break; 2155 break;
2156 case PR_SET_NO_NEW_PRIVS: 2156 case PR_SET_NO_NEW_PRIVS:
2157 if (arg2 != 1 || arg3 || arg4 || arg5) 2157 if (arg2 != 1 || arg3 || arg4 || arg5)
2158 return -EINVAL; 2158 return -EINVAL;
2159 2159
2160 current->no_new_privs = 1; 2160 current->no_new_privs = 1;
2161 break; 2161 break;
2162 case PR_GET_NO_NEW_PRIVS: 2162 case PR_GET_NO_NEW_PRIVS:
2163 if (arg2 || arg3 || arg4 || arg5) 2163 if (arg2 || arg3 || arg4 || arg5)
2164 return -EINVAL; 2164 return -EINVAL;
2165 return current->no_new_privs ? 1 : 0; 2165 return current->no_new_privs ? 1 : 0;
2166 default: 2166 default:
2167 error = -EINVAL; 2167 error = -EINVAL;
2168 break; 2168 break;
2169 } 2169 }
2170 return error; 2170 return error;
2171 } 2171 }
2172 2172
2173 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 2173 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
2174 struct getcpu_cache __user *, unused) 2174 struct getcpu_cache __user *, unused)
2175 { 2175 {
2176 int err = 0; 2176 int err = 0;
2177 int cpu = raw_smp_processor_id(); 2177 int cpu = raw_smp_processor_id();
2178 if (cpup) 2178 if (cpup)
2179 err |= put_user(cpu, cpup); 2179 err |= put_user(cpu, cpup);
2180 if (nodep) 2180 if (nodep)
2181 err |= put_user(cpu_to_node(cpu), nodep); 2181 err |= put_user(cpu_to_node(cpu), nodep);
2182 return err ? -EFAULT : 0; 2182 return err ? -EFAULT : 0;
2183 } 2183 }
2184 2184
2185 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 2185 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
2186 2186
2187 static void argv_cleanup(struct subprocess_info *info) 2187 static void argv_cleanup(struct subprocess_info *info)
2188 { 2188 {
2189 argv_free(info->argv); 2189 argv_free(info->argv);
2190 } 2190 }
2191 2191
2192 static int __orderly_poweroff(void) 2192 static int __orderly_poweroff(void)
2193 { 2193 {
2194 int argc; 2194 int argc;
2195 char **argv; 2195 char **argv;
2196 static char *envp[] = { 2196 static char *envp[] = {
2197 "HOME=/", 2197 "HOME=/",
2198 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", 2198 "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
2199 NULL 2199 NULL
2200 }; 2200 };
2201 int ret; 2201 int ret;
2202 2202
2203 argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); 2203 argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
2204 if (argv == NULL) { 2204 if (argv == NULL) {
2205 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 2205 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
2206 __func__, poweroff_cmd); 2206 __func__, poweroff_cmd);
2207 return -ENOMEM; 2207 return -ENOMEM;
2208 } 2208 }
2209 2209
2210 ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, 2210 ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC,
2211 NULL, argv_cleanup, NULL); 2211 NULL, argv_cleanup, NULL);
2212 if (ret == -ENOMEM) 2212 if (ret == -ENOMEM)
2213 argv_free(argv); 2213 argv_free(argv);
2214 2214
2215 return ret; 2215 return ret;
2216 } 2216 }
2217 2217
2218 /** 2218 /**
2219 * orderly_poweroff - Trigger an orderly system poweroff 2219 * orderly_poweroff - Trigger an orderly system poweroff
2220 * @force: force poweroff if command execution fails 2220 * @force: force poweroff if command execution fails
2221 * 2221 *
2222 * This may be called from any context to trigger a system shutdown. 2222 * This may be called from any context to trigger a system shutdown.
2223 * If the orderly shutdown fails, it will force an immediate shutdown. 2223 * If the orderly shutdown fails, it will force an immediate shutdown.
2224 */ 2224 */
2225 int orderly_poweroff(bool force) 2225 int orderly_poweroff(bool force)
2226 { 2226 {
2227 int ret = __orderly_poweroff(); 2227 int ret = __orderly_poweroff();
2228 2228
2229 if (ret && force) { 2229 if (ret && force) {
2230 printk(KERN_WARNING "Failed to start orderly shutdown: " 2230 printk(KERN_WARNING "Failed to start orderly shutdown: "
2231 "forcing the issue\n"); 2231 "forcing the issue\n");
2232 2232
2233 /* 2233 /*
2234 * I guess this should try to kick off some daemon to sync and 2234 * I guess this should try to kick off some daemon to sync and
2235 * poweroff asap. Or not even bother syncing if we're doing an 2235 * poweroff asap. Or not even bother syncing if we're doing an
2236 * emergency shutdown? 2236 * emergency shutdown?
2237 */ 2237 */
2238 emergency_sync(); 2238 emergency_sync();
2239 kernel_power_off(); 2239 kernel_power_off();
2240 } 2240 }
2241 2241
2242 return ret; 2242 return ret;
2243 } 2243 }
2244 EXPORT_SYMBOL_GPL(orderly_poweroff); 2244 EXPORT_SYMBOL_GPL(orderly_poweroff);
2245 2245