Commit e80d0a1ae8bb8fee0edd37427836f108b30f596b
1 parent
a634f93335
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
cputime: Rename thread_group_times to thread_group_cputime_adjusted
We have thread_group_cputime() and thread_group_times(). The naming doesn't provide enough information about the difference between these two APIs. To lower the confusion, rename thread_group_times() to thread_group_cputime_adjusted(). This name better suggests that it's a version of thread_group_cputime() that does some stabilization on the raw cputime values. ie here: scale on top of CFS runtime stats and bound lower value for monotonicity. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Showing 5 changed files with 13 additions and 13 deletions Inline Diff
fs/proc/array.c
1 | /* | 1 | /* |
2 | * linux/fs/proc/array.c | 2 | * linux/fs/proc/array.c |
3 | * | 3 | * |
4 | * Copyright (C) 1992 by Linus Torvalds | 4 | * Copyright (C) 1992 by Linus Torvalds |
5 | * based on ideas by Darren Senn | 5 | * based on ideas by Darren Senn |
6 | * | 6 | * |
7 | * Fixes: | 7 | * Fixes: |
8 | * Michael. K. Johnson: stat,statm extensions. | 8 | * Michael. K. Johnson: stat,statm extensions. |
9 | * <johnsonm@stolaf.edu> | 9 | * <johnsonm@stolaf.edu> |
10 | * | 10 | * |
11 | * Pauline Middelink : Made cmdline,envline only break at '\0's, to | 11 | * Pauline Middelink : Made cmdline,envline only break at '\0's, to |
12 | * make sure SET_PROCTITLE works. Also removed | 12 | * make sure SET_PROCTITLE works. Also removed |
13 | * bad '!' which forced address recalculation for | 13 | * bad '!' which forced address recalculation for |
14 | * EVERY character on the current page. | 14 | * EVERY character on the current page. |
15 | * <middelin@polyware.iaf.nl> | 15 | * <middelin@polyware.iaf.nl> |
16 | * | 16 | * |
17 | * Danny ter Haar : added cpuinfo | 17 | * Danny ter Haar : added cpuinfo |
18 | * <dth@cistron.nl> | 18 | * <dth@cistron.nl> |
19 | * | 19 | * |
20 | * Alessandro Rubini : profile extension. | 20 | * Alessandro Rubini : profile extension. |
21 | * <rubini@ipvvis.unipv.it> | 21 | * <rubini@ipvvis.unipv.it> |
22 | * | 22 | * |
23 | * Jeff Tranter : added BogoMips field to cpuinfo | 23 | * Jeff Tranter : added BogoMips field to cpuinfo |
24 | * <Jeff_Tranter@Mitel.COM> | 24 | * <Jeff_Tranter@Mitel.COM> |
25 | * | 25 | * |
26 | * Bruno Haible : remove 4K limit for the maps file | 26 | * Bruno Haible : remove 4K limit for the maps file |
27 | * <haible@ma2s2.mathematik.uni-karlsruhe.de> | 27 | * <haible@ma2s2.mathematik.uni-karlsruhe.de> |
28 | * | 28 | * |
29 | * Yves Arrouye : remove removal of trailing spaces in get_array. | 29 | * Yves Arrouye : remove removal of trailing spaces in get_array. |
30 | * <Yves.Arrouye@marin.fdn.fr> | 30 | * <Yves.Arrouye@marin.fdn.fr> |
31 | * | 31 | * |
32 | * Jerome Forissier : added per-CPU time information to /proc/stat | 32 | * Jerome Forissier : added per-CPU time information to /proc/stat |
33 | * and /proc/<pid>/cpu extension | 33 | * and /proc/<pid>/cpu extension |
34 | * <forissier@isia.cma.fr> | 34 | * <forissier@isia.cma.fr> |
35 | * - Incorporation and non-SMP safe operation | 35 | * - Incorporation and non-SMP safe operation |
36 | * of forissier patch in 2.1.78 by | 36 | * of forissier patch in 2.1.78 by |
37 | * Hans Marcus <crowbar@concepts.nl> | 37 | * Hans Marcus <crowbar@concepts.nl> |
38 | * | 38 | * |
39 | * aeb@cwi.nl : /proc/partitions | 39 | * aeb@cwi.nl : /proc/partitions |
40 | * | 40 | * |
41 | * | 41 | * |
42 | * Alan Cox : security fixes. | 42 | * Alan Cox : security fixes. |
43 | * <alan@lxorguk.ukuu.org.uk> | 43 | * <alan@lxorguk.ukuu.org.uk> |
44 | * | 44 | * |
45 | * Al Viro : safe handling of mm_struct | 45 | * Al Viro : safe handling of mm_struct |
46 | * | 46 | * |
47 | * Gerhard Wichert : added BIGMEM support | 47 | * Gerhard Wichert : added BIGMEM support |
48 | * Siemens AG <Gerhard.Wichert@pdb.siemens.de> | 48 | * Siemens AG <Gerhard.Wichert@pdb.siemens.de> |
49 | * | 49 | * |
50 | * Al Viro & Jeff Garzik : moved most of the thing into base.c and | 50 | * Al Viro & Jeff Garzik : moved most of the thing into base.c and |
51 | * : proc_misc.c. The rest may eventually go into | 51 | * : proc_misc.c. The rest may eventually go into |
52 | * : base.c too. | 52 | * : base.c too. |
53 | */ | 53 | */ |
54 | 54 | ||
55 | #include <linux/types.h> | 55 | #include <linux/types.h> |
56 | #include <linux/errno.h> | 56 | #include <linux/errno.h> |
57 | #include <linux/time.h> | 57 | #include <linux/time.h> |
58 | #include <linux/kernel.h> | 58 | #include <linux/kernel.h> |
59 | #include <linux/kernel_stat.h> | 59 | #include <linux/kernel_stat.h> |
60 | #include <linux/tty.h> | 60 | #include <linux/tty.h> |
61 | #include <linux/string.h> | 61 | #include <linux/string.h> |
62 | #include <linux/mman.h> | 62 | #include <linux/mman.h> |
63 | #include <linux/proc_fs.h> | 63 | #include <linux/proc_fs.h> |
64 | #include <linux/ioport.h> | 64 | #include <linux/ioport.h> |
65 | #include <linux/uaccess.h> | 65 | #include <linux/uaccess.h> |
66 | #include <linux/io.h> | 66 | #include <linux/io.h> |
67 | #include <linux/mm.h> | 67 | #include <linux/mm.h> |
68 | #include <linux/hugetlb.h> | 68 | #include <linux/hugetlb.h> |
69 | #include <linux/pagemap.h> | 69 | #include <linux/pagemap.h> |
70 | #include <linux/swap.h> | 70 | #include <linux/swap.h> |
71 | #include <linux/smp.h> | 71 | #include <linux/smp.h> |
72 | #include <linux/signal.h> | 72 | #include <linux/signal.h> |
73 | #include <linux/highmem.h> | 73 | #include <linux/highmem.h> |
74 | #include <linux/file.h> | 74 | #include <linux/file.h> |
75 | #include <linux/fdtable.h> | 75 | #include <linux/fdtable.h> |
76 | #include <linux/times.h> | 76 | #include <linux/times.h> |
77 | #include <linux/cpuset.h> | 77 | #include <linux/cpuset.h> |
78 | #include <linux/rcupdate.h> | 78 | #include <linux/rcupdate.h> |
79 | #include <linux/delayacct.h> | 79 | #include <linux/delayacct.h> |
80 | #include <linux/seq_file.h> | 80 | #include <linux/seq_file.h> |
81 | #include <linux/pid_namespace.h> | 81 | #include <linux/pid_namespace.h> |
82 | #include <linux/ptrace.h> | 82 | #include <linux/ptrace.h> |
83 | #include <linux/tracehook.h> | 83 | #include <linux/tracehook.h> |
84 | #include <linux/user_namespace.h> | 84 | #include <linux/user_namespace.h> |
85 | 85 | ||
86 | #include <asm/pgtable.h> | 86 | #include <asm/pgtable.h> |
87 | #include <asm/processor.h> | 87 | #include <asm/processor.h> |
88 | #include "internal.h" | 88 | #include "internal.h" |
89 | 89 | ||
90 | static inline void task_name(struct seq_file *m, struct task_struct *p) | 90 | static inline void task_name(struct seq_file *m, struct task_struct *p) |
91 | { | 91 | { |
92 | int i; | 92 | int i; |
93 | char *buf, *end; | 93 | char *buf, *end; |
94 | char *name; | 94 | char *name; |
95 | char tcomm[sizeof(p->comm)]; | 95 | char tcomm[sizeof(p->comm)]; |
96 | 96 | ||
97 | get_task_comm(tcomm, p); | 97 | get_task_comm(tcomm, p); |
98 | 98 | ||
99 | seq_puts(m, "Name:\t"); | 99 | seq_puts(m, "Name:\t"); |
100 | end = m->buf + m->size; | 100 | end = m->buf + m->size; |
101 | buf = m->buf + m->count; | 101 | buf = m->buf + m->count; |
102 | name = tcomm; | 102 | name = tcomm; |
103 | i = sizeof(tcomm); | 103 | i = sizeof(tcomm); |
104 | while (i && (buf < end)) { | 104 | while (i && (buf < end)) { |
105 | unsigned char c = *name; | 105 | unsigned char c = *name; |
106 | name++; | 106 | name++; |
107 | i--; | 107 | i--; |
108 | *buf = c; | 108 | *buf = c; |
109 | if (!c) | 109 | if (!c) |
110 | break; | 110 | break; |
111 | if (c == '\\') { | 111 | if (c == '\\') { |
112 | buf++; | 112 | buf++; |
113 | if (buf < end) | 113 | if (buf < end) |
114 | *buf++ = c; | 114 | *buf++ = c; |
115 | continue; | 115 | continue; |
116 | } | 116 | } |
117 | if (c == '\n') { | 117 | if (c == '\n') { |
118 | *buf++ = '\\'; | 118 | *buf++ = '\\'; |
119 | if (buf < end) | 119 | if (buf < end) |
120 | *buf++ = 'n'; | 120 | *buf++ = 'n'; |
121 | continue; | 121 | continue; |
122 | } | 122 | } |
123 | buf++; | 123 | buf++; |
124 | } | 124 | } |
125 | m->count = buf - m->buf; | 125 | m->count = buf - m->buf; |
126 | seq_putc(m, '\n'); | 126 | seq_putc(m, '\n'); |
127 | } | 127 | } |
128 | 128 | ||
129 | /* | 129 | /* |
130 | * The task state array is a strange "bitmap" of | 130 | * The task state array is a strange "bitmap" of |
131 | * reasons to sleep. Thus "running" is zero, and | 131 | * reasons to sleep. Thus "running" is zero, and |
132 | * you can test for combinations of others with | 132 | * you can test for combinations of others with |
133 | * simple bit tests. | 133 | * simple bit tests. |
134 | */ | 134 | */ |
135 | static const char * const task_state_array[] = { | 135 | static const char * const task_state_array[] = { |
136 | "R (running)", /* 0 */ | 136 | "R (running)", /* 0 */ |
137 | "S (sleeping)", /* 1 */ | 137 | "S (sleeping)", /* 1 */ |
138 | "D (disk sleep)", /* 2 */ | 138 | "D (disk sleep)", /* 2 */ |
139 | "T (stopped)", /* 4 */ | 139 | "T (stopped)", /* 4 */ |
140 | "t (tracing stop)", /* 8 */ | 140 | "t (tracing stop)", /* 8 */ |
141 | "Z (zombie)", /* 16 */ | 141 | "Z (zombie)", /* 16 */ |
142 | "X (dead)", /* 32 */ | 142 | "X (dead)", /* 32 */ |
143 | "x (dead)", /* 64 */ | 143 | "x (dead)", /* 64 */ |
144 | "K (wakekill)", /* 128 */ | 144 | "K (wakekill)", /* 128 */ |
145 | "W (waking)", /* 256 */ | 145 | "W (waking)", /* 256 */ |
146 | }; | 146 | }; |
147 | 147 | ||
148 | static inline const char *get_task_state(struct task_struct *tsk) | 148 | static inline const char *get_task_state(struct task_struct *tsk) |
149 | { | 149 | { |
150 | unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; | 150 | unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; |
151 | const char * const *p = &task_state_array[0]; | 151 | const char * const *p = &task_state_array[0]; |
152 | 152 | ||
153 | BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); | 153 | BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); |
154 | 154 | ||
155 | while (state) { | 155 | while (state) { |
156 | p++; | 156 | p++; |
157 | state >>= 1; | 157 | state >>= 1; |
158 | } | 158 | } |
159 | return *p; | 159 | return *p; |
160 | } | 160 | } |
161 | 161 | ||
162 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | 162 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, |
163 | struct pid *pid, struct task_struct *p) | 163 | struct pid *pid, struct task_struct *p) |
164 | { | 164 | { |
165 | struct user_namespace *user_ns = current_user_ns(); | 165 | struct user_namespace *user_ns = current_user_ns(); |
166 | struct group_info *group_info; | 166 | struct group_info *group_info; |
167 | int g; | 167 | int g; |
168 | struct fdtable *fdt = NULL; | 168 | struct fdtable *fdt = NULL; |
169 | const struct cred *cred; | 169 | const struct cred *cred; |
170 | pid_t ppid, tpid; | 170 | pid_t ppid, tpid; |
171 | 171 | ||
172 | rcu_read_lock(); | 172 | rcu_read_lock(); |
173 | ppid = pid_alive(p) ? | 173 | ppid = pid_alive(p) ? |
174 | task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; | 174 | task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; |
175 | tpid = 0; | 175 | tpid = 0; |
176 | if (pid_alive(p)) { | 176 | if (pid_alive(p)) { |
177 | struct task_struct *tracer = ptrace_parent(p); | 177 | struct task_struct *tracer = ptrace_parent(p); |
178 | if (tracer) | 178 | if (tracer) |
179 | tpid = task_pid_nr_ns(tracer, ns); | 179 | tpid = task_pid_nr_ns(tracer, ns); |
180 | } | 180 | } |
181 | cred = get_task_cred(p); | 181 | cred = get_task_cred(p); |
182 | seq_printf(m, | 182 | seq_printf(m, |
183 | "State:\t%s\n" | 183 | "State:\t%s\n" |
184 | "Tgid:\t%d\n" | 184 | "Tgid:\t%d\n" |
185 | "Pid:\t%d\n" | 185 | "Pid:\t%d\n" |
186 | "PPid:\t%d\n" | 186 | "PPid:\t%d\n" |
187 | "TracerPid:\t%d\n" | 187 | "TracerPid:\t%d\n" |
188 | "Uid:\t%d\t%d\t%d\t%d\n" | 188 | "Uid:\t%d\t%d\t%d\t%d\n" |
189 | "Gid:\t%d\t%d\t%d\t%d\n", | 189 | "Gid:\t%d\t%d\t%d\t%d\n", |
190 | get_task_state(p), | 190 | get_task_state(p), |
191 | task_tgid_nr_ns(p, ns), | 191 | task_tgid_nr_ns(p, ns), |
192 | pid_nr_ns(pid, ns), | 192 | pid_nr_ns(pid, ns), |
193 | ppid, tpid, | 193 | ppid, tpid, |
194 | from_kuid_munged(user_ns, cred->uid), | 194 | from_kuid_munged(user_ns, cred->uid), |
195 | from_kuid_munged(user_ns, cred->euid), | 195 | from_kuid_munged(user_ns, cred->euid), |
196 | from_kuid_munged(user_ns, cred->suid), | 196 | from_kuid_munged(user_ns, cred->suid), |
197 | from_kuid_munged(user_ns, cred->fsuid), | 197 | from_kuid_munged(user_ns, cred->fsuid), |
198 | from_kgid_munged(user_ns, cred->gid), | 198 | from_kgid_munged(user_ns, cred->gid), |
199 | from_kgid_munged(user_ns, cred->egid), | 199 | from_kgid_munged(user_ns, cred->egid), |
200 | from_kgid_munged(user_ns, cred->sgid), | 200 | from_kgid_munged(user_ns, cred->sgid), |
201 | from_kgid_munged(user_ns, cred->fsgid)); | 201 | from_kgid_munged(user_ns, cred->fsgid)); |
202 | 202 | ||
203 | task_lock(p); | 203 | task_lock(p); |
204 | if (p->files) | 204 | if (p->files) |
205 | fdt = files_fdtable(p->files); | 205 | fdt = files_fdtable(p->files); |
206 | seq_printf(m, | 206 | seq_printf(m, |
207 | "FDSize:\t%d\n" | 207 | "FDSize:\t%d\n" |
208 | "Groups:\t", | 208 | "Groups:\t", |
209 | fdt ? fdt->max_fds : 0); | 209 | fdt ? fdt->max_fds : 0); |
210 | rcu_read_unlock(); | 210 | rcu_read_unlock(); |
211 | 211 | ||
212 | group_info = cred->group_info; | 212 | group_info = cred->group_info; |
213 | task_unlock(p); | 213 | task_unlock(p); |
214 | 214 | ||
215 | for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) | 215 | for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) |
216 | seq_printf(m, "%d ", | 216 | seq_printf(m, "%d ", |
217 | from_kgid_munged(user_ns, GROUP_AT(group_info, g))); | 217 | from_kgid_munged(user_ns, GROUP_AT(group_info, g))); |
218 | put_cred(cred); | 218 | put_cred(cred); |
219 | 219 | ||
220 | seq_putc(m, '\n'); | 220 | seq_putc(m, '\n'); |
221 | } | 221 | } |
222 | 222 | ||
223 | static void render_sigset_t(struct seq_file *m, const char *header, | 223 | static void render_sigset_t(struct seq_file *m, const char *header, |
224 | sigset_t *set) | 224 | sigset_t *set) |
225 | { | 225 | { |
226 | int i; | 226 | int i; |
227 | 227 | ||
228 | seq_puts(m, header); | 228 | seq_puts(m, header); |
229 | 229 | ||
230 | i = _NSIG; | 230 | i = _NSIG; |
231 | do { | 231 | do { |
232 | int x = 0; | 232 | int x = 0; |
233 | 233 | ||
234 | i -= 4; | 234 | i -= 4; |
235 | if (sigismember(set, i+1)) x |= 1; | 235 | if (sigismember(set, i+1)) x |= 1; |
236 | if (sigismember(set, i+2)) x |= 2; | 236 | if (sigismember(set, i+2)) x |= 2; |
237 | if (sigismember(set, i+3)) x |= 4; | 237 | if (sigismember(set, i+3)) x |= 4; |
238 | if (sigismember(set, i+4)) x |= 8; | 238 | if (sigismember(set, i+4)) x |= 8; |
239 | seq_printf(m, "%x", x); | 239 | seq_printf(m, "%x", x); |
240 | } while (i >= 4); | 240 | } while (i >= 4); |
241 | 241 | ||
242 | seq_putc(m, '\n'); | 242 | seq_putc(m, '\n'); |
243 | } | 243 | } |
244 | 244 | ||
245 | static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, | 245 | static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, |
246 | sigset_t *catch) | 246 | sigset_t *catch) |
247 | { | 247 | { |
248 | struct k_sigaction *k; | 248 | struct k_sigaction *k; |
249 | int i; | 249 | int i; |
250 | 250 | ||
251 | k = p->sighand->action; | 251 | k = p->sighand->action; |
252 | for (i = 1; i <= _NSIG; ++i, ++k) { | 252 | for (i = 1; i <= _NSIG; ++i, ++k) { |
253 | if (k->sa.sa_handler == SIG_IGN) | 253 | if (k->sa.sa_handler == SIG_IGN) |
254 | sigaddset(ign, i); | 254 | sigaddset(ign, i); |
255 | else if (k->sa.sa_handler != SIG_DFL) | 255 | else if (k->sa.sa_handler != SIG_DFL) |
256 | sigaddset(catch, i); | 256 | sigaddset(catch, i); |
257 | } | 257 | } |
258 | } | 258 | } |
259 | 259 | ||
260 | static inline void task_sig(struct seq_file *m, struct task_struct *p) | 260 | static inline void task_sig(struct seq_file *m, struct task_struct *p) |
261 | { | 261 | { |
262 | unsigned long flags; | 262 | unsigned long flags; |
263 | sigset_t pending, shpending, blocked, ignored, caught; | 263 | sigset_t pending, shpending, blocked, ignored, caught; |
264 | int num_threads = 0; | 264 | int num_threads = 0; |
265 | unsigned long qsize = 0; | 265 | unsigned long qsize = 0; |
266 | unsigned long qlim = 0; | 266 | unsigned long qlim = 0; |
267 | 267 | ||
268 | sigemptyset(&pending); | 268 | sigemptyset(&pending); |
269 | sigemptyset(&shpending); | 269 | sigemptyset(&shpending); |
270 | sigemptyset(&blocked); | 270 | sigemptyset(&blocked); |
271 | sigemptyset(&ignored); | 271 | sigemptyset(&ignored); |
272 | sigemptyset(&caught); | 272 | sigemptyset(&caught); |
273 | 273 | ||
274 | if (lock_task_sighand(p, &flags)) { | 274 | if (lock_task_sighand(p, &flags)) { |
275 | pending = p->pending.signal; | 275 | pending = p->pending.signal; |
276 | shpending = p->signal->shared_pending.signal; | 276 | shpending = p->signal->shared_pending.signal; |
277 | blocked = p->blocked; | 277 | blocked = p->blocked; |
278 | collect_sigign_sigcatch(p, &ignored, &caught); | 278 | collect_sigign_sigcatch(p, &ignored, &caught); |
279 | num_threads = get_nr_threads(p); | 279 | num_threads = get_nr_threads(p); |
280 | rcu_read_lock(); /* FIXME: is this correct? */ | 280 | rcu_read_lock(); /* FIXME: is this correct? */ |
281 | qsize = atomic_read(&__task_cred(p)->user->sigpending); | 281 | qsize = atomic_read(&__task_cred(p)->user->sigpending); |
282 | rcu_read_unlock(); | 282 | rcu_read_unlock(); |
283 | qlim = task_rlimit(p, RLIMIT_SIGPENDING); | 283 | qlim = task_rlimit(p, RLIMIT_SIGPENDING); |
284 | unlock_task_sighand(p, &flags); | 284 | unlock_task_sighand(p, &flags); |
285 | } | 285 | } |
286 | 286 | ||
287 | seq_printf(m, "Threads:\t%d\n", num_threads); | 287 | seq_printf(m, "Threads:\t%d\n", num_threads); |
288 | seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); | 288 | seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); |
289 | 289 | ||
290 | /* render them all */ | 290 | /* render them all */ |
291 | render_sigset_t(m, "SigPnd:\t", &pending); | 291 | render_sigset_t(m, "SigPnd:\t", &pending); |
292 | render_sigset_t(m, "ShdPnd:\t", &shpending); | 292 | render_sigset_t(m, "ShdPnd:\t", &shpending); |
293 | render_sigset_t(m, "SigBlk:\t", &blocked); | 293 | render_sigset_t(m, "SigBlk:\t", &blocked); |
294 | render_sigset_t(m, "SigIgn:\t", &ignored); | 294 | render_sigset_t(m, "SigIgn:\t", &ignored); |
295 | render_sigset_t(m, "SigCgt:\t", &caught); | 295 | render_sigset_t(m, "SigCgt:\t", &caught); |
296 | } | 296 | } |
297 | 297 | ||
298 | static void render_cap_t(struct seq_file *m, const char *header, | 298 | static void render_cap_t(struct seq_file *m, const char *header, |
299 | kernel_cap_t *a) | 299 | kernel_cap_t *a) |
300 | { | 300 | { |
301 | unsigned __capi; | 301 | unsigned __capi; |
302 | 302 | ||
303 | seq_puts(m, header); | 303 | seq_puts(m, header); |
304 | CAP_FOR_EACH_U32(__capi) { | 304 | CAP_FOR_EACH_U32(__capi) { |
305 | seq_printf(m, "%08x", | 305 | seq_printf(m, "%08x", |
306 | a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); | 306 | a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); |
307 | } | 307 | } |
308 | seq_putc(m, '\n'); | 308 | seq_putc(m, '\n'); |
309 | } | 309 | } |
310 | 310 | ||
311 | static inline void task_cap(struct seq_file *m, struct task_struct *p) | 311 | static inline void task_cap(struct seq_file *m, struct task_struct *p) |
312 | { | 312 | { |
313 | const struct cred *cred; | 313 | const struct cred *cred; |
314 | kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset; | 314 | kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset; |
315 | 315 | ||
316 | rcu_read_lock(); | 316 | rcu_read_lock(); |
317 | cred = __task_cred(p); | 317 | cred = __task_cred(p); |
318 | cap_inheritable = cred->cap_inheritable; | 318 | cap_inheritable = cred->cap_inheritable; |
319 | cap_permitted = cred->cap_permitted; | 319 | cap_permitted = cred->cap_permitted; |
320 | cap_effective = cred->cap_effective; | 320 | cap_effective = cred->cap_effective; |
321 | cap_bset = cred->cap_bset; | 321 | cap_bset = cred->cap_bset; |
322 | rcu_read_unlock(); | 322 | rcu_read_unlock(); |
323 | 323 | ||
324 | render_cap_t(m, "CapInh:\t", &cap_inheritable); | 324 | render_cap_t(m, "CapInh:\t", &cap_inheritable); |
325 | render_cap_t(m, "CapPrm:\t", &cap_permitted); | 325 | render_cap_t(m, "CapPrm:\t", &cap_permitted); |
326 | render_cap_t(m, "CapEff:\t", &cap_effective); | 326 | render_cap_t(m, "CapEff:\t", &cap_effective); |
327 | render_cap_t(m, "CapBnd:\t", &cap_bset); | 327 | render_cap_t(m, "CapBnd:\t", &cap_bset); |
328 | } | 328 | } |
329 | 329 | ||
330 | static inline void task_context_switch_counts(struct seq_file *m, | 330 | static inline void task_context_switch_counts(struct seq_file *m, |
331 | struct task_struct *p) | 331 | struct task_struct *p) |
332 | { | 332 | { |
333 | seq_printf(m, "voluntary_ctxt_switches:\t%lu\n" | 333 | seq_printf(m, "voluntary_ctxt_switches:\t%lu\n" |
334 | "nonvoluntary_ctxt_switches:\t%lu\n", | 334 | "nonvoluntary_ctxt_switches:\t%lu\n", |
335 | p->nvcsw, | 335 | p->nvcsw, |
336 | p->nivcsw); | 336 | p->nivcsw); |
337 | } | 337 | } |
338 | 338 | ||
339 | static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) | 339 | static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) |
340 | { | 340 | { |
341 | seq_puts(m, "Cpus_allowed:\t"); | 341 | seq_puts(m, "Cpus_allowed:\t"); |
342 | seq_cpumask(m, &task->cpus_allowed); | 342 | seq_cpumask(m, &task->cpus_allowed); |
343 | seq_putc(m, '\n'); | 343 | seq_putc(m, '\n'); |
344 | seq_puts(m, "Cpus_allowed_list:\t"); | 344 | seq_puts(m, "Cpus_allowed_list:\t"); |
345 | seq_cpumask_list(m, &task->cpus_allowed); | 345 | seq_cpumask_list(m, &task->cpus_allowed); |
346 | seq_putc(m, '\n'); | 346 | seq_putc(m, '\n'); |
347 | } | 347 | } |
348 | 348 | ||
349 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | 349 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, |
350 | struct pid *pid, struct task_struct *task) | 350 | struct pid *pid, struct task_struct *task) |
351 | { | 351 | { |
352 | struct mm_struct *mm = get_task_mm(task); | 352 | struct mm_struct *mm = get_task_mm(task); |
353 | 353 | ||
354 | task_name(m, task); | 354 | task_name(m, task); |
355 | task_state(m, ns, pid, task); | 355 | task_state(m, ns, pid, task); |
356 | 356 | ||
357 | if (mm) { | 357 | if (mm) { |
358 | task_mem(m, mm); | 358 | task_mem(m, mm); |
359 | mmput(mm); | 359 | mmput(mm); |
360 | } | 360 | } |
361 | task_sig(m, task); | 361 | task_sig(m, task); |
362 | task_cap(m, task); | 362 | task_cap(m, task); |
363 | task_cpus_allowed(m, task); | 363 | task_cpus_allowed(m, task); |
364 | cpuset_task_status_allowed(m, task); | 364 | cpuset_task_status_allowed(m, task); |
365 | task_context_switch_counts(m, task); | 365 | task_context_switch_counts(m, task); |
366 | return 0; | 366 | return 0; |
367 | } | 367 | } |
368 | 368 | ||
369 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | 369 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, |
370 | struct pid *pid, struct task_struct *task, int whole) | 370 | struct pid *pid, struct task_struct *task, int whole) |
371 | { | 371 | { |
372 | unsigned long vsize, eip, esp, wchan = ~0UL; | 372 | unsigned long vsize, eip, esp, wchan = ~0UL; |
373 | int priority, nice; | 373 | int priority, nice; |
374 | int tty_pgrp = -1, tty_nr = 0; | 374 | int tty_pgrp = -1, tty_nr = 0; |
375 | sigset_t sigign, sigcatch; | 375 | sigset_t sigign, sigcatch; |
376 | char state; | 376 | char state; |
377 | pid_t ppid = 0, pgid = -1, sid = -1; | 377 | pid_t ppid = 0, pgid = -1, sid = -1; |
378 | int num_threads = 0; | 378 | int num_threads = 0; |
379 | int permitted; | 379 | int permitted; |
380 | struct mm_struct *mm; | 380 | struct mm_struct *mm; |
381 | unsigned long long start_time; | 381 | unsigned long long start_time; |
382 | unsigned long cmin_flt = 0, cmaj_flt = 0; | 382 | unsigned long cmin_flt = 0, cmaj_flt = 0; |
383 | unsigned long min_flt = 0, maj_flt = 0; | 383 | unsigned long min_flt = 0, maj_flt = 0; |
384 | cputime_t cutime, cstime, utime, stime; | 384 | cputime_t cutime, cstime, utime, stime; |
385 | cputime_t cgtime, gtime; | 385 | cputime_t cgtime, gtime; |
386 | unsigned long rsslim = 0; | 386 | unsigned long rsslim = 0; |
387 | char tcomm[sizeof(task->comm)]; | 387 | char tcomm[sizeof(task->comm)]; |
388 | unsigned long flags; | 388 | unsigned long flags; |
389 | 389 | ||
390 | state = *get_task_state(task); | 390 | state = *get_task_state(task); |
391 | vsize = eip = esp = 0; | 391 | vsize = eip = esp = 0; |
392 | permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT); | 392 | permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT); |
393 | mm = get_task_mm(task); | 393 | mm = get_task_mm(task); |
394 | if (mm) { | 394 | if (mm) { |
395 | vsize = task_vsize(mm); | 395 | vsize = task_vsize(mm); |
396 | if (permitted) { | 396 | if (permitted) { |
397 | eip = KSTK_EIP(task); | 397 | eip = KSTK_EIP(task); |
398 | esp = KSTK_ESP(task); | 398 | esp = KSTK_ESP(task); |
399 | } | 399 | } |
400 | } | 400 | } |
401 | 401 | ||
402 | get_task_comm(tcomm, task); | 402 | get_task_comm(tcomm, task); |
403 | 403 | ||
404 | sigemptyset(&sigign); | 404 | sigemptyset(&sigign); |
405 | sigemptyset(&sigcatch); | 405 | sigemptyset(&sigcatch); |
406 | cutime = cstime = utime = stime = 0; | 406 | cutime = cstime = utime = stime = 0; |
407 | cgtime = gtime = 0; | 407 | cgtime = gtime = 0; |
408 | 408 | ||
409 | if (lock_task_sighand(task, &flags)) { | 409 | if (lock_task_sighand(task, &flags)) { |
410 | struct signal_struct *sig = task->signal; | 410 | struct signal_struct *sig = task->signal; |
411 | 411 | ||
412 | if (sig->tty) { | 412 | if (sig->tty) { |
413 | struct pid *pgrp = tty_get_pgrp(sig->tty); | 413 | struct pid *pgrp = tty_get_pgrp(sig->tty); |
414 | tty_pgrp = pid_nr_ns(pgrp, ns); | 414 | tty_pgrp = pid_nr_ns(pgrp, ns); |
415 | put_pid(pgrp); | 415 | put_pid(pgrp); |
416 | tty_nr = new_encode_dev(tty_devnum(sig->tty)); | 416 | tty_nr = new_encode_dev(tty_devnum(sig->tty)); |
417 | } | 417 | } |
418 | 418 | ||
419 | num_threads = get_nr_threads(task); | 419 | num_threads = get_nr_threads(task); |
420 | collect_sigign_sigcatch(task, &sigign, &sigcatch); | 420 | collect_sigign_sigcatch(task, &sigign, &sigcatch); |
421 | 421 | ||
422 | cmin_flt = sig->cmin_flt; | 422 | cmin_flt = sig->cmin_flt; |
423 | cmaj_flt = sig->cmaj_flt; | 423 | cmaj_flt = sig->cmaj_flt; |
424 | cutime = sig->cutime; | 424 | cutime = sig->cutime; |
425 | cstime = sig->cstime; | 425 | cstime = sig->cstime; |
426 | cgtime = sig->cgtime; | 426 | cgtime = sig->cgtime; |
427 | rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); | 427 | rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); |
428 | 428 | ||
429 | /* add up live thread stats at the group level */ | 429 | /* add up live thread stats at the group level */ |
430 | if (whole) { | 430 | if (whole) { |
431 | struct task_struct *t = task; | 431 | struct task_struct *t = task; |
432 | do { | 432 | do { |
433 | min_flt += t->min_flt; | 433 | min_flt += t->min_flt; |
434 | maj_flt += t->maj_flt; | 434 | maj_flt += t->maj_flt; |
435 | gtime += t->gtime; | 435 | gtime += t->gtime; |
436 | t = next_thread(t); | 436 | t = next_thread(t); |
437 | } while (t != task); | 437 | } while (t != task); |
438 | 438 | ||
439 | min_flt += sig->min_flt; | 439 | min_flt += sig->min_flt; |
440 | maj_flt += sig->maj_flt; | 440 | maj_flt += sig->maj_flt; |
441 | thread_group_times(task, &utime, &stime); | 441 | thread_group_cputime_adjusted(task, &utime, &stime); |
442 | gtime += sig->gtime; | 442 | gtime += sig->gtime; |
443 | } | 443 | } |
444 | 444 | ||
445 | sid = task_session_nr_ns(task, ns); | 445 | sid = task_session_nr_ns(task, ns); |
446 | ppid = task_tgid_nr_ns(task->real_parent, ns); | 446 | ppid = task_tgid_nr_ns(task->real_parent, ns); |
447 | pgid = task_pgrp_nr_ns(task, ns); | 447 | pgid = task_pgrp_nr_ns(task, ns); |
448 | 448 | ||
449 | unlock_task_sighand(task, &flags); | 449 | unlock_task_sighand(task, &flags); |
450 | } | 450 | } |
451 | 451 | ||
452 | if (permitted && (!whole || num_threads < 2)) | 452 | if (permitted && (!whole || num_threads < 2)) |
453 | wchan = get_wchan(task); | 453 | wchan = get_wchan(task); |
454 | if (!whole) { | 454 | if (!whole) { |
455 | min_flt = task->min_flt; | 455 | min_flt = task->min_flt; |
456 | maj_flt = task->maj_flt; | 456 | maj_flt = task->maj_flt; |
457 | task_times(task, &utime, &stime); | 457 | task_cputime_adjusted(task, &utime, &stime); |
458 | gtime = task->gtime; | 458 | gtime = task->gtime; |
459 | } | 459 | } |
460 | 460 | ||
461 | /* scale priority and nice values from timeslices to -20..20 */ | 461 | /* scale priority and nice values from timeslices to -20..20 */ |
462 | /* to make it look like a "normal" Unix priority/nice value */ | 462 | /* to make it look like a "normal" Unix priority/nice value */ |
463 | priority = task_prio(task); | 463 | priority = task_prio(task); |
464 | nice = task_nice(task); | 464 | nice = task_nice(task); |
465 | 465 | ||
466 | /* Temporary variable needed for gcc-2.96 */ | 466 | /* Temporary variable needed for gcc-2.96 */ |
467 | /* convert timespec -> nsec*/ | 467 | /* convert timespec -> nsec*/ |
468 | start_time = | 468 | start_time = |
469 | (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC | 469 | (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC |
470 | + task->real_start_time.tv_nsec; | 470 | + task->real_start_time.tv_nsec; |
471 | /* convert nsec -> ticks */ | 471 | /* convert nsec -> ticks */ |
472 | start_time = nsec_to_clock_t(start_time); | 472 | start_time = nsec_to_clock_t(start_time); |
473 | 473 | ||
474 | seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); | 474 | seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); |
475 | seq_put_decimal_ll(m, ' ', ppid); | 475 | seq_put_decimal_ll(m, ' ', ppid); |
476 | seq_put_decimal_ll(m, ' ', pgid); | 476 | seq_put_decimal_ll(m, ' ', pgid); |
477 | seq_put_decimal_ll(m, ' ', sid); | 477 | seq_put_decimal_ll(m, ' ', sid); |
478 | seq_put_decimal_ll(m, ' ', tty_nr); | 478 | seq_put_decimal_ll(m, ' ', tty_nr); |
479 | seq_put_decimal_ll(m, ' ', tty_pgrp); | 479 | seq_put_decimal_ll(m, ' ', tty_pgrp); |
480 | seq_put_decimal_ull(m, ' ', task->flags); | 480 | seq_put_decimal_ull(m, ' ', task->flags); |
481 | seq_put_decimal_ull(m, ' ', min_flt); | 481 | seq_put_decimal_ull(m, ' ', min_flt); |
482 | seq_put_decimal_ull(m, ' ', cmin_flt); | 482 | seq_put_decimal_ull(m, ' ', cmin_flt); |
483 | seq_put_decimal_ull(m, ' ', maj_flt); | 483 | seq_put_decimal_ull(m, ' ', maj_flt); |
484 | seq_put_decimal_ull(m, ' ', cmaj_flt); | 484 | seq_put_decimal_ull(m, ' ', cmaj_flt); |
485 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime)); | 485 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime)); |
486 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime)); | 486 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime)); |
487 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime)); | 487 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime)); |
488 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime)); | 488 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime)); |
489 | seq_put_decimal_ll(m, ' ', priority); | 489 | seq_put_decimal_ll(m, ' ', priority); |
490 | seq_put_decimal_ll(m, ' ', nice); | 490 | seq_put_decimal_ll(m, ' ', nice); |
491 | seq_put_decimal_ll(m, ' ', num_threads); | 491 | seq_put_decimal_ll(m, ' ', num_threads); |
492 | seq_put_decimal_ull(m, ' ', 0); | 492 | seq_put_decimal_ull(m, ' ', 0); |
493 | seq_put_decimal_ull(m, ' ', start_time); | 493 | seq_put_decimal_ull(m, ' ', start_time); |
494 | seq_put_decimal_ull(m, ' ', vsize); | 494 | seq_put_decimal_ull(m, ' ', vsize); |
495 | seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0); | 495 | seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0); |
496 | seq_put_decimal_ull(m, ' ', rsslim); | 496 | seq_put_decimal_ull(m, ' ', rsslim); |
497 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); | 497 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); |
498 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); | 498 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); |
499 | seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0); | 499 | seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0); |
500 | seq_put_decimal_ull(m, ' ', esp); | 500 | seq_put_decimal_ull(m, ' ', esp); |
501 | seq_put_decimal_ull(m, ' ', eip); | 501 | seq_put_decimal_ull(m, ' ', eip); |
502 | /* The signal information here is obsolete. | 502 | /* The signal information here is obsolete. |
503 | * It must be decimal for Linux 2.0 compatibility. | 503 | * It must be decimal for Linux 2.0 compatibility. |
504 | * Use /proc/#/status for real-time signals. | 504 | * Use /proc/#/status for real-time signals. |
505 | */ | 505 | */ |
506 | seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL); | 506 | seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL); |
507 | seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); | 507 | seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); |
508 | seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); | 508 | seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); |
509 | seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); | 509 | seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); |
510 | seq_put_decimal_ull(m, ' ', wchan); | 510 | seq_put_decimal_ull(m, ' ', wchan); |
511 | seq_put_decimal_ull(m, ' ', 0); | 511 | seq_put_decimal_ull(m, ' ', 0); |
512 | seq_put_decimal_ull(m, ' ', 0); | 512 | seq_put_decimal_ull(m, ' ', 0); |
513 | seq_put_decimal_ll(m, ' ', task->exit_signal); | 513 | seq_put_decimal_ll(m, ' ', task->exit_signal); |
514 | seq_put_decimal_ll(m, ' ', task_cpu(task)); | 514 | seq_put_decimal_ll(m, ' ', task_cpu(task)); |
515 | seq_put_decimal_ull(m, ' ', task->rt_priority); | 515 | seq_put_decimal_ull(m, ' ', task->rt_priority); |
516 | seq_put_decimal_ull(m, ' ', task->policy); | 516 | seq_put_decimal_ull(m, ' ', task->policy); |
517 | seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); | 517 | seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); |
518 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); | 518 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); |
519 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); | 519 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); |
520 | 520 | ||
521 | if (mm && permitted) { | 521 | if (mm && permitted) { |
522 | seq_put_decimal_ull(m, ' ', mm->start_data); | 522 | seq_put_decimal_ull(m, ' ', mm->start_data); |
523 | seq_put_decimal_ull(m, ' ', mm->end_data); | 523 | seq_put_decimal_ull(m, ' ', mm->end_data); |
524 | seq_put_decimal_ull(m, ' ', mm->start_brk); | 524 | seq_put_decimal_ull(m, ' ', mm->start_brk); |
525 | seq_put_decimal_ull(m, ' ', mm->arg_start); | 525 | seq_put_decimal_ull(m, ' ', mm->arg_start); |
526 | seq_put_decimal_ull(m, ' ', mm->arg_end); | 526 | seq_put_decimal_ull(m, ' ', mm->arg_end); |
527 | seq_put_decimal_ull(m, ' ', mm->env_start); | 527 | seq_put_decimal_ull(m, ' ', mm->env_start); |
528 | seq_put_decimal_ull(m, ' ', mm->env_end); | 528 | seq_put_decimal_ull(m, ' ', mm->env_end); |
529 | } else | 529 | } else |
530 | seq_printf(m, " 0 0 0 0 0 0 0"); | 530 | seq_printf(m, " 0 0 0 0 0 0 0"); |
531 | 531 | ||
532 | if (permitted) | 532 | if (permitted) |
533 | seq_put_decimal_ll(m, ' ', task->exit_code); | 533 | seq_put_decimal_ll(m, ' ', task->exit_code); |
534 | else | 534 | else |
535 | seq_put_decimal_ll(m, ' ', 0); | 535 | seq_put_decimal_ll(m, ' ', 0); |
536 | 536 | ||
537 | seq_putc(m, '\n'); | 537 | seq_putc(m, '\n'); |
538 | if (mm) | 538 | if (mm) |
539 | mmput(mm); | 539 | mmput(mm); |
540 | return 0; | 540 | return 0; |
541 | } | 541 | } |
542 | 542 | ||
543 | int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, | 543 | int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, |
544 | struct pid *pid, struct task_struct *task) | 544 | struct pid *pid, struct task_struct *task) |
545 | { | 545 | { |
546 | return do_task_stat(m, ns, pid, task, 0); | 546 | return do_task_stat(m, ns, pid, task, 0); |
547 | } | 547 | } |
548 | 548 | ||
549 | int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, | 549 | int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, |
550 | struct pid *pid, struct task_struct *task) | 550 | struct pid *pid, struct task_struct *task) |
551 | { | 551 | { |
552 | return do_task_stat(m, ns, pid, task, 1); | 552 | return do_task_stat(m, ns, pid, task, 1); |
553 | } | 553 | } |
554 | 554 | ||
555 | int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, | 555 | int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, |
556 | struct pid *pid, struct task_struct *task) | 556 | struct pid *pid, struct task_struct *task) |
557 | { | 557 | { |
558 | unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0; | 558 | unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0; |
559 | struct mm_struct *mm = get_task_mm(task); | 559 | struct mm_struct *mm = get_task_mm(task); |
560 | 560 | ||
561 | if (mm) { | 561 | if (mm) { |
562 | size = task_statm(mm, &shared, &text, &data, &resident); | 562 | size = task_statm(mm, &shared, &text, &data, &resident); |
563 | mmput(mm); | 563 | mmput(mm); |
564 | } | 564 | } |
565 | /* | 565 | /* |
566 | * For quick read, open code by putting numbers directly | 566 | * For quick read, open code by putting numbers directly |
567 | * expected format is | 567 | * expected format is |
568 | * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", | 568 | * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", |
569 | * size, resident, shared, text, data); | 569 | * size, resident, shared, text, data); |
570 | */ | 570 | */ |
571 | seq_put_decimal_ull(m, 0, size); | 571 | seq_put_decimal_ull(m, 0, size); |
572 | seq_put_decimal_ull(m, ' ', resident); | 572 | seq_put_decimal_ull(m, ' ', resident); |
573 | seq_put_decimal_ull(m, ' ', shared); | 573 | seq_put_decimal_ull(m, ' ', shared); |
574 | seq_put_decimal_ull(m, ' ', text); | 574 | seq_put_decimal_ull(m, ' ', text); |
575 | seq_put_decimal_ull(m, ' ', 0); | 575 | seq_put_decimal_ull(m, ' ', 0); |
576 | seq_put_decimal_ull(m, ' ', data); | 576 | seq_put_decimal_ull(m, ' ', data); |
577 | seq_put_decimal_ull(m, ' ', 0); | 577 | seq_put_decimal_ull(m, ' ', 0); |
578 | seq_putc(m, '\n'); | 578 | seq_putc(m, '\n'); |
579 | 579 | ||
580 | return 0; | 580 | return 0; |
581 | } | 581 | } |
582 | 582 | ||
583 | #ifdef CONFIG_CHECKPOINT_RESTORE | 583 | #ifdef CONFIG_CHECKPOINT_RESTORE |
584 | static struct pid * | 584 | static struct pid * |
585 | get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos) | 585 | get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos) |
586 | { | 586 | { |
587 | struct task_struct *start, *task; | 587 | struct task_struct *start, *task; |
588 | struct pid *pid = NULL; | 588 | struct pid *pid = NULL; |
589 | 589 | ||
590 | read_lock(&tasklist_lock); | 590 | read_lock(&tasklist_lock); |
591 | 591 | ||
592 | start = pid_task(proc_pid(inode), PIDTYPE_PID); | 592 | start = pid_task(proc_pid(inode), PIDTYPE_PID); |
593 | if (!start) | 593 | if (!start) |
594 | goto out; | 594 | goto out; |
595 | 595 | ||
596 | /* | 596 | /* |
597 | * Lets try to continue searching first, this gives | 597 | * Lets try to continue searching first, this gives |
598 | * us significant speedup on children-rich processes. | 598 | * us significant speedup on children-rich processes. |
599 | */ | 599 | */ |
600 | if (pid_prev) { | 600 | if (pid_prev) { |
601 | task = pid_task(pid_prev, PIDTYPE_PID); | 601 | task = pid_task(pid_prev, PIDTYPE_PID); |
602 | if (task && task->real_parent == start && | 602 | if (task && task->real_parent == start && |
603 | !(list_empty(&task->sibling))) { | 603 | !(list_empty(&task->sibling))) { |
604 | if (list_is_last(&task->sibling, &start->children)) | 604 | if (list_is_last(&task->sibling, &start->children)) |
605 | goto out; | 605 | goto out; |
606 | task = list_first_entry(&task->sibling, | 606 | task = list_first_entry(&task->sibling, |
607 | struct task_struct, sibling); | 607 | struct task_struct, sibling); |
608 | pid = get_pid(task_pid(task)); | 608 | pid = get_pid(task_pid(task)); |
609 | goto out; | 609 | goto out; |
610 | } | 610 | } |
611 | } | 611 | } |
612 | 612 | ||
613 | /* | 613 | /* |
614 | * Slow search case. | 614 | * Slow search case. |
615 | * | 615 | * |
616 | * We might miss some children here if children | 616 | * We might miss some children here if children |
617 | * are exited while we were not holding the lock, | 617 | * are exited while we were not holding the lock, |
618 | * but it was never promised to be accurate that | 618 | * but it was never promised to be accurate that |
619 | * much. | 619 | * much. |
620 | * | 620 | * |
621 | * "Just suppose that the parent sleeps, but N children | 621 | * "Just suppose that the parent sleeps, but N children |
622 | * exit after we printed their tids. Now the slow paths | 622 | * exit after we printed their tids. Now the slow paths |
623 | * skips N extra children, we miss N tasks." (c) | 623 | * skips N extra children, we miss N tasks." (c) |
624 | * | 624 | * |
625 | * So one need to stop or freeze the leader and all | 625 | * So one need to stop or freeze the leader and all |
626 | * its children to get a precise result. | 626 | * its children to get a precise result. |
627 | */ | 627 | */ |
628 | list_for_each_entry(task, &start->children, sibling) { | 628 | list_for_each_entry(task, &start->children, sibling) { |
629 | if (pos-- == 0) { | 629 | if (pos-- == 0) { |
630 | pid = get_pid(task_pid(task)); | 630 | pid = get_pid(task_pid(task)); |
631 | break; | 631 | break; |
632 | } | 632 | } |
633 | } | 633 | } |
634 | 634 | ||
635 | out: | 635 | out: |
636 | read_unlock(&tasklist_lock); | 636 | read_unlock(&tasklist_lock); |
637 | return pid; | 637 | return pid; |
638 | } | 638 | } |
639 | 639 | ||
640 | static int children_seq_show(struct seq_file *seq, void *v) | 640 | static int children_seq_show(struct seq_file *seq, void *v) |
641 | { | 641 | { |
642 | struct inode *inode = seq->private; | 642 | struct inode *inode = seq->private; |
643 | pid_t pid; | 643 | pid_t pid; |
644 | 644 | ||
645 | pid = pid_nr_ns(v, inode->i_sb->s_fs_info); | 645 | pid = pid_nr_ns(v, inode->i_sb->s_fs_info); |
646 | return seq_printf(seq, "%d ", pid); | 646 | return seq_printf(seq, "%d ", pid); |
647 | } | 647 | } |
648 | 648 | ||
649 | static void *children_seq_start(struct seq_file *seq, loff_t *pos) | 649 | static void *children_seq_start(struct seq_file *seq, loff_t *pos) |
650 | { | 650 | { |
651 | return get_children_pid(seq->private, NULL, *pos); | 651 | return get_children_pid(seq->private, NULL, *pos); |
652 | } | 652 | } |
653 | 653 | ||
654 | static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 654 | static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
655 | { | 655 | { |
656 | struct pid *pid; | 656 | struct pid *pid; |
657 | 657 | ||
658 | pid = get_children_pid(seq->private, v, *pos + 1); | 658 | pid = get_children_pid(seq->private, v, *pos + 1); |
659 | put_pid(v); | 659 | put_pid(v); |
660 | 660 | ||
661 | ++*pos; | 661 | ++*pos; |
662 | return pid; | 662 | return pid; |
663 | } | 663 | } |
664 | 664 | ||
665 | static void children_seq_stop(struct seq_file *seq, void *v) | 665 | static void children_seq_stop(struct seq_file *seq, void *v) |
666 | { | 666 | { |
667 | put_pid(v); | 667 | put_pid(v); |
668 | } | 668 | } |
669 | 669 | ||
670 | static const struct seq_operations children_seq_ops = { | 670 | static const struct seq_operations children_seq_ops = { |
671 | .start = children_seq_start, | 671 | .start = children_seq_start, |
672 | .next = children_seq_next, | 672 | .next = children_seq_next, |
673 | .stop = children_seq_stop, | 673 | .stop = children_seq_stop, |
674 | .show = children_seq_show, | 674 | .show = children_seq_show, |
675 | }; | 675 | }; |
676 | 676 | ||
677 | static int children_seq_open(struct inode *inode, struct file *file) | 677 | static int children_seq_open(struct inode *inode, struct file *file) |
678 | { | 678 | { |
679 | struct seq_file *m; | 679 | struct seq_file *m; |
680 | int ret; | 680 | int ret; |
681 | 681 | ||
682 | ret = seq_open(file, &children_seq_ops); | 682 | ret = seq_open(file, &children_seq_ops); |
683 | if (ret) | 683 | if (ret) |
684 | return ret; | 684 | return ret; |
685 | 685 | ||
686 | m = file->private_data; | 686 | m = file->private_data; |
687 | m->private = inode; | 687 | m->private = inode; |
688 | 688 | ||
689 | return ret; | 689 | return ret; |
690 | } | 690 | } |
691 | 691 | ||
692 | int children_seq_release(struct inode *inode, struct file *file) | 692 | int children_seq_release(struct inode *inode, struct file *file) |
693 | { | 693 | { |
694 | seq_release(inode, file); | 694 | seq_release(inode, file); |
695 | return 0; | 695 | return 0; |
696 | } | 696 | } |
697 | 697 | ||
698 | const struct file_operations proc_tid_children_operations = { | 698 | const struct file_operations proc_tid_children_operations = { |
699 | .open = children_seq_open, | 699 | .open = children_seq_open, |
700 | .read = seq_read, | 700 | .read = seq_read, |
701 | .llseek = seq_lseek, | 701 | .llseek = seq_lseek, |
702 | .release = children_seq_release, | 702 | .release = children_seq_release, |
703 | }; | 703 | }; |
704 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | 704 | #endif /* CONFIG_CHECKPOINT_RESTORE */ |
705 | 705 |
include/linux/sched.h
1 | #ifndef _LINUX_SCHED_H | 1 | #ifndef _LINUX_SCHED_H |
2 | #define _LINUX_SCHED_H | 2 | #define _LINUX_SCHED_H |
3 | 3 | ||
4 | #include <uapi/linux/sched.h> | 4 | #include <uapi/linux/sched.h> |
5 | 5 | ||
6 | 6 | ||
7 | struct sched_param { | 7 | struct sched_param { |
8 | int sched_priority; | 8 | int sched_priority; |
9 | }; | 9 | }; |
10 | 10 | ||
11 | #include <asm/param.h> /* for HZ */ | 11 | #include <asm/param.h> /* for HZ */ |
12 | 12 | ||
13 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
14 | #include <linux/threads.h> | 14 | #include <linux/threads.h> |
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/types.h> | 16 | #include <linux/types.h> |
17 | #include <linux/timex.h> | 17 | #include <linux/timex.h> |
18 | #include <linux/jiffies.h> | 18 | #include <linux/jiffies.h> |
19 | #include <linux/rbtree.h> | 19 | #include <linux/rbtree.h> |
20 | #include <linux/thread_info.h> | 20 | #include <linux/thread_info.h> |
21 | #include <linux/cpumask.h> | 21 | #include <linux/cpumask.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/nodemask.h> | 23 | #include <linux/nodemask.h> |
24 | #include <linux/mm_types.h> | 24 | #include <linux/mm_types.h> |
25 | 25 | ||
26 | #include <asm/page.h> | 26 | #include <asm/page.h> |
27 | #include <asm/ptrace.h> | 27 | #include <asm/ptrace.h> |
28 | #include <asm/cputime.h> | 28 | #include <asm/cputime.h> |
29 | 29 | ||
30 | #include <linux/smp.h> | 30 | #include <linux/smp.h> |
31 | #include <linux/sem.h> | 31 | #include <linux/sem.h> |
32 | #include <linux/signal.h> | 32 | #include <linux/signal.h> |
33 | #include <linux/compiler.h> | 33 | #include <linux/compiler.h> |
34 | #include <linux/completion.h> | 34 | #include <linux/completion.h> |
35 | #include <linux/pid.h> | 35 | #include <linux/pid.h> |
36 | #include <linux/percpu.h> | 36 | #include <linux/percpu.h> |
37 | #include <linux/topology.h> | 37 | #include <linux/topology.h> |
38 | #include <linux/proportions.h> | 38 | #include <linux/proportions.h> |
39 | #include <linux/seccomp.h> | 39 | #include <linux/seccomp.h> |
40 | #include <linux/rcupdate.h> | 40 | #include <linux/rcupdate.h> |
41 | #include <linux/rculist.h> | 41 | #include <linux/rculist.h> |
42 | #include <linux/rtmutex.h> | 42 | #include <linux/rtmutex.h> |
43 | 43 | ||
44 | #include <linux/time.h> | 44 | #include <linux/time.h> |
45 | #include <linux/param.h> | 45 | #include <linux/param.h> |
46 | #include <linux/resource.h> | 46 | #include <linux/resource.h> |
47 | #include <linux/timer.h> | 47 | #include <linux/timer.h> |
48 | #include <linux/hrtimer.h> | 48 | #include <linux/hrtimer.h> |
49 | #include <linux/task_io_accounting.h> | 49 | #include <linux/task_io_accounting.h> |
50 | #include <linux/latencytop.h> | 50 | #include <linux/latencytop.h> |
51 | #include <linux/cred.h> | 51 | #include <linux/cred.h> |
52 | #include <linux/llist.h> | 52 | #include <linux/llist.h> |
53 | #include <linux/uidgid.h> | 53 | #include <linux/uidgid.h> |
54 | 54 | ||
55 | #include <asm/processor.h> | 55 | #include <asm/processor.h> |
56 | 56 | ||
57 | struct exec_domain; | 57 | struct exec_domain; |
58 | struct futex_pi_state; | 58 | struct futex_pi_state; |
59 | struct robust_list_head; | 59 | struct robust_list_head; |
60 | struct bio_list; | 60 | struct bio_list; |
61 | struct fs_struct; | 61 | struct fs_struct; |
62 | struct perf_event_context; | 62 | struct perf_event_context; |
63 | struct blk_plug; | 63 | struct blk_plug; |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * List of flags we want to share for kernel threads, | 66 | * List of flags we want to share for kernel threads, |
67 | * if only because they are not used by them anyway. | 67 | * if only because they are not used by them anyway. |
68 | */ | 68 | */ |
69 | #define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND) | 69 | #define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND) |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * These are the constant used to fake the fixed-point load-average | 72 | * These are the constant used to fake the fixed-point load-average |
73 | * counting. Some notes: | 73 | * counting. Some notes: |
74 | * - 11 bit fractions expand to 22 bits by the multiplies: this gives | 74 | * - 11 bit fractions expand to 22 bits by the multiplies: this gives |
75 | * a load-average precision of 10 bits integer + 11 bits fractional | 75 | * a load-average precision of 10 bits integer + 11 bits fractional |
76 | * - if you want to count load-averages more often, you need more | 76 | * - if you want to count load-averages more often, you need more |
77 | * precision, or rounding will get you. With 2-second counting freq, | 77 | * precision, or rounding will get you. With 2-second counting freq, |
78 | * the EXP_n values would be 1981, 2034 and 2043 if still using only | 78 | * the EXP_n values would be 1981, 2034 and 2043 if still using only |
79 | * 11 bit fractions. | 79 | * 11 bit fractions. |
80 | */ | 80 | */ |
81 | extern unsigned long avenrun[]; /* Load averages */ | 81 | extern unsigned long avenrun[]; /* Load averages */ |
82 | extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); | 82 | extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); |
83 | 83 | ||
84 | #define FSHIFT 11 /* nr of bits of precision */ | 84 | #define FSHIFT 11 /* nr of bits of precision */ |
85 | #define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ | 85 | #define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ |
86 | #define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */ | 86 | #define LOAD_FREQ (5*HZ+1) /* 5 sec intervals */ |
87 | #define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ | 87 | #define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ |
88 | #define EXP_5 2014 /* 1/exp(5sec/5min) */ | 88 | #define EXP_5 2014 /* 1/exp(5sec/5min) */ |
89 | #define EXP_15 2037 /* 1/exp(5sec/15min) */ | 89 | #define EXP_15 2037 /* 1/exp(5sec/15min) */ |
90 | 90 | ||
91 | #define CALC_LOAD(load,exp,n) \ | 91 | #define CALC_LOAD(load,exp,n) \ |
92 | load *= exp; \ | 92 | load *= exp; \ |
93 | load += n*(FIXED_1-exp); \ | 93 | load += n*(FIXED_1-exp); \ |
94 | load >>= FSHIFT; | 94 | load >>= FSHIFT; |
95 | 95 | ||
96 | extern unsigned long total_forks; | 96 | extern unsigned long total_forks; |
97 | extern int nr_threads; | 97 | extern int nr_threads; |
98 | DECLARE_PER_CPU(unsigned long, process_counts); | 98 | DECLARE_PER_CPU(unsigned long, process_counts); |
99 | extern int nr_processes(void); | 99 | extern int nr_processes(void); |
100 | extern unsigned long nr_running(void); | 100 | extern unsigned long nr_running(void); |
101 | extern unsigned long nr_uninterruptible(void); | 101 | extern unsigned long nr_uninterruptible(void); |
102 | extern unsigned long nr_iowait(void); | 102 | extern unsigned long nr_iowait(void); |
103 | extern unsigned long nr_iowait_cpu(int cpu); | 103 | extern unsigned long nr_iowait_cpu(int cpu); |
104 | extern unsigned long this_cpu_load(void); | 104 | extern unsigned long this_cpu_load(void); |
105 | 105 | ||
106 | 106 | ||
107 | extern void calc_global_load(unsigned long ticks); | 107 | extern void calc_global_load(unsigned long ticks); |
108 | extern void update_cpu_load_nohz(void); | 108 | extern void update_cpu_load_nohz(void); |
109 | 109 | ||
110 | extern unsigned long get_parent_ip(unsigned long addr); | 110 | extern unsigned long get_parent_ip(unsigned long addr); |
111 | 111 | ||
112 | struct seq_file; | 112 | struct seq_file; |
113 | struct cfs_rq; | 113 | struct cfs_rq; |
114 | struct task_group; | 114 | struct task_group; |
115 | #ifdef CONFIG_SCHED_DEBUG | 115 | #ifdef CONFIG_SCHED_DEBUG |
116 | extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); | 116 | extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); |
117 | extern void proc_sched_set_task(struct task_struct *p); | 117 | extern void proc_sched_set_task(struct task_struct *p); |
118 | extern void | 118 | extern void |
119 | print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); | 119 | print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); |
120 | #else | 120 | #else |
121 | static inline void | 121 | static inline void |
122 | proc_sched_show_task(struct task_struct *p, struct seq_file *m) | 122 | proc_sched_show_task(struct task_struct *p, struct seq_file *m) |
123 | { | 123 | { |
124 | } | 124 | } |
125 | static inline void proc_sched_set_task(struct task_struct *p) | 125 | static inline void proc_sched_set_task(struct task_struct *p) |
126 | { | 126 | { |
127 | } | 127 | } |
128 | static inline void | 128 | static inline void |
129 | print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | 129 | print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) |
130 | { | 130 | { |
131 | } | 131 | } |
132 | #endif | 132 | #endif |
133 | 133 | ||
134 | /* | 134 | /* |
135 | * Task state bitmask. NOTE! These bits are also | 135 | * Task state bitmask. NOTE! These bits are also |
136 | * encoded in fs/proc/array.c: get_task_state(). | 136 | * encoded in fs/proc/array.c: get_task_state(). |
137 | * | 137 | * |
138 | * We have two separate sets of flags: task->state | 138 | * We have two separate sets of flags: task->state |
139 | * is about runnability, while task->exit_state are | 139 | * is about runnability, while task->exit_state are |
140 | * about the task exiting. Confusing, but this way | 140 | * about the task exiting. Confusing, but this way |
141 | * modifying one set can't modify the other one by | 141 | * modifying one set can't modify the other one by |
142 | * mistake. | 142 | * mistake. |
143 | */ | 143 | */ |
144 | #define TASK_RUNNING 0 | 144 | #define TASK_RUNNING 0 |
145 | #define TASK_INTERRUPTIBLE 1 | 145 | #define TASK_INTERRUPTIBLE 1 |
146 | #define TASK_UNINTERRUPTIBLE 2 | 146 | #define TASK_UNINTERRUPTIBLE 2 |
147 | #define __TASK_STOPPED 4 | 147 | #define __TASK_STOPPED 4 |
148 | #define __TASK_TRACED 8 | 148 | #define __TASK_TRACED 8 |
149 | /* in tsk->exit_state */ | 149 | /* in tsk->exit_state */ |
150 | #define EXIT_ZOMBIE 16 | 150 | #define EXIT_ZOMBIE 16 |
151 | #define EXIT_DEAD 32 | 151 | #define EXIT_DEAD 32 |
152 | /* in tsk->state again */ | 152 | /* in tsk->state again */ |
153 | #define TASK_DEAD 64 | 153 | #define TASK_DEAD 64 |
154 | #define TASK_WAKEKILL 128 | 154 | #define TASK_WAKEKILL 128 |
155 | #define TASK_WAKING 256 | 155 | #define TASK_WAKING 256 |
156 | #define TASK_STATE_MAX 512 | 156 | #define TASK_STATE_MAX 512 |
157 | 157 | ||
158 | #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" | 158 | #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" |
159 | 159 | ||
160 | extern char ___assert_task_state[1 - 2*!!( | 160 | extern char ___assert_task_state[1 - 2*!!( |
161 | sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; | 161 | sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; |
162 | 162 | ||
163 | /* Convenience macros for the sake of set_task_state */ | 163 | /* Convenience macros for the sake of set_task_state */ |
164 | #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) | 164 | #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) |
165 | #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) | 165 | #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) |
166 | #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) | 166 | #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) |
167 | 167 | ||
168 | /* Convenience macros for the sake of wake_up */ | 168 | /* Convenience macros for the sake of wake_up */ |
169 | #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) | 169 | #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) |
170 | #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) | 170 | #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) |
171 | 171 | ||
172 | /* get_task_state() */ | 172 | /* get_task_state() */ |
173 | #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ | 173 | #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ |
174 | TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ | 174 | TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ |
175 | __TASK_TRACED) | 175 | __TASK_TRACED) |
176 | 176 | ||
177 | #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) | 177 | #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) |
178 | #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) | 178 | #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) |
179 | #define task_is_dead(task) ((task)->exit_state != 0) | 179 | #define task_is_dead(task) ((task)->exit_state != 0) |
180 | #define task_is_stopped_or_traced(task) \ | 180 | #define task_is_stopped_or_traced(task) \ |
181 | ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) | 181 | ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) |
182 | #define task_contributes_to_load(task) \ | 182 | #define task_contributes_to_load(task) \ |
183 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ | 183 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ |
184 | (task->flags & PF_FROZEN) == 0) | 184 | (task->flags & PF_FROZEN) == 0) |
185 | 185 | ||
186 | #define __set_task_state(tsk, state_value) \ | 186 | #define __set_task_state(tsk, state_value) \ |
187 | do { (tsk)->state = (state_value); } while (0) | 187 | do { (tsk)->state = (state_value); } while (0) |
188 | #define set_task_state(tsk, state_value) \ | 188 | #define set_task_state(tsk, state_value) \ |
189 | set_mb((tsk)->state, (state_value)) | 189 | set_mb((tsk)->state, (state_value)) |
190 | 190 | ||
191 | /* | 191 | /* |
192 | * set_current_state() includes a barrier so that the write of current->state | 192 | * set_current_state() includes a barrier so that the write of current->state |
193 | * is correctly serialised wrt the caller's subsequent test of whether to | 193 | * is correctly serialised wrt the caller's subsequent test of whether to |
194 | * actually sleep: | 194 | * actually sleep: |
195 | * | 195 | * |
196 | * set_current_state(TASK_UNINTERRUPTIBLE); | 196 | * set_current_state(TASK_UNINTERRUPTIBLE); |
197 | * if (do_i_need_to_sleep()) | 197 | * if (do_i_need_to_sleep()) |
198 | * schedule(); | 198 | * schedule(); |
199 | * | 199 | * |
200 | * If the caller does not need such serialisation then use __set_current_state() | 200 | * If the caller does not need such serialisation then use __set_current_state() |
201 | */ | 201 | */ |
202 | #define __set_current_state(state_value) \ | 202 | #define __set_current_state(state_value) \ |
203 | do { current->state = (state_value); } while (0) | 203 | do { current->state = (state_value); } while (0) |
204 | #define set_current_state(state_value) \ | 204 | #define set_current_state(state_value) \ |
205 | set_mb(current->state, (state_value)) | 205 | set_mb(current->state, (state_value)) |
206 | 206 | ||
207 | /* Task command name length */ | 207 | /* Task command name length */ |
208 | #define TASK_COMM_LEN 16 | 208 | #define TASK_COMM_LEN 16 |
209 | 209 | ||
210 | #include <linux/spinlock.h> | 210 | #include <linux/spinlock.h> |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * This serializes "schedule()" and also protects | 213 | * This serializes "schedule()" and also protects |
214 | * the run-queue from deletions/modifications (but | 214 | * the run-queue from deletions/modifications (but |
215 | * _adding_ to the beginning of the run-queue has | 215 | * _adding_ to the beginning of the run-queue has |
216 | * a separate lock). | 216 | * a separate lock). |
217 | */ | 217 | */ |
218 | extern rwlock_t tasklist_lock; | 218 | extern rwlock_t tasklist_lock; |
219 | extern spinlock_t mmlist_lock; | 219 | extern spinlock_t mmlist_lock; |
220 | 220 | ||
221 | struct task_struct; | 221 | struct task_struct; |
222 | 222 | ||
223 | #ifdef CONFIG_PROVE_RCU | 223 | #ifdef CONFIG_PROVE_RCU |
224 | extern int lockdep_tasklist_lock_is_held(void); | 224 | extern int lockdep_tasklist_lock_is_held(void); |
225 | #endif /* #ifdef CONFIG_PROVE_RCU */ | 225 | #endif /* #ifdef CONFIG_PROVE_RCU */ |
226 | 226 | ||
227 | extern void sched_init(void); | 227 | extern void sched_init(void); |
228 | extern void sched_init_smp(void); | 228 | extern void sched_init_smp(void); |
229 | extern asmlinkage void schedule_tail(struct task_struct *prev); | 229 | extern asmlinkage void schedule_tail(struct task_struct *prev); |
230 | extern void init_idle(struct task_struct *idle, int cpu); | 230 | extern void init_idle(struct task_struct *idle, int cpu); |
231 | extern void init_idle_bootup_task(struct task_struct *idle); | 231 | extern void init_idle_bootup_task(struct task_struct *idle); |
232 | 232 | ||
233 | extern int runqueue_is_locked(int cpu); | 233 | extern int runqueue_is_locked(int cpu); |
234 | 234 | ||
235 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) | 235 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) |
236 | extern void nohz_balance_enter_idle(int cpu); | 236 | extern void nohz_balance_enter_idle(int cpu); |
237 | extern void set_cpu_sd_state_idle(void); | 237 | extern void set_cpu_sd_state_idle(void); |
238 | extern int get_nohz_timer_target(void); | 238 | extern int get_nohz_timer_target(void); |
239 | #else | 239 | #else |
240 | static inline void nohz_balance_enter_idle(int cpu) { } | 240 | static inline void nohz_balance_enter_idle(int cpu) { } |
241 | static inline void set_cpu_sd_state_idle(void) { } | 241 | static inline void set_cpu_sd_state_idle(void) { } |
242 | #endif | 242 | #endif |
243 | 243 | ||
244 | /* | 244 | /* |
245 | * Only dump TASK_* tasks. (0 for all tasks) | 245 | * Only dump TASK_* tasks. (0 for all tasks) |
246 | */ | 246 | */ |
247 | extern void show_state_filter(unsigned long state_filter); | 247 | extern void show_state_filter(unsigned long state_filter); |
248 | 248 | ||
249 | static inline void show_state(void) | 249 | static inline void show_state(void) |
250 | { | 250 | { |
251 | show_state_filter(0); | 251 | show_state_filter(0); |
252 | } | 252 | } |
253 | 253 | ||
254 | extern void show_regs(struct pt_regs *); | 254 | extern void show_regs(struct pt_regs *); |
255 | 255 | ||
256 | /* | 256 | /* |
257 | * TASK is a pointer to the task whose backtrace we want to see (or NULL for current | 257 | * TASK is a pointer to the task whose backtrace we want to see (or NULL for current |
258 | * task), SP is the stack pointer of the first frame that should be shown in the back | 258 | * task), SP is the stack pointer of the first frame that should be shown in the back |
259 | * trace (or NULL if the entire call-chain of the task should be shown). | 259 | * trace (or NULL if the entire call-chain of the task should be shown). |
260 | */ | 260 | */ |
261 | extern void show_stack(struct task_struct *task, unsigned long *sp); | 261 | extern void show_stack(struct task_struct *task, unsigned long *sp); |
262 | 262 | ||
263 | void io_schedule(void); | 263 | void io_schedule(void); |
264 | long io_schedule_timeout(long timeout); | 264 | long io_schedule_timeout(long timeout); |
265 | 265 | ||
266 | extern void cpu_init (void); | 266 | extern void cpu_init (void); |
267 | extern void trap_init(void); | 267 | extern void trap_init(void); |
268 | extern void update_process_times(int user); | 268 | extern void update_process_times(int user); |
269 | extern void scheduler_tick(void); | 269 | extern void scheduler_tick(void); |
270 | 270 | ||
271 | extern void sched_show_task(struct task_struct *p); | 271 | extern void sched_show_task(struct task_struct *p); |
272 | 272 | ||
273 | #ifdef CONFIG_LOCKUP_DETECTOR | 273 | #ifdef CONFIG_LOCKUP_DETECTOR |
274 | extern void touch_softlockup_watchdog(void); | 274 | extern void touch_softlockup_watchdog(void); |
275 | extern void touch_softlockup_watchdog_sync(void); | 275 | extern void touch_softlockup_watchdog_sync(void); |
276 | extern void touch_all_softlockup_watchdogs(void); | 276 | extern void touch_all_softlockup_watchdogs(void); |
277 | extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, | 277 | extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, |
278 | void __user *buffer, | 278 | void __user *buffer, |
279 | size_t *lenp, loff_t *ppos); | 279 | size_t *lenp, loff_t *ppos); |
280 | extern unsigned int softlockup_panic; | 280 | extern unsigned int softlockup_panic; |
281 | void lockup_detector_init(void); | 281 | void lockup_detector_init(void); |
282 | #else | 282 | #else |
283 | static inline void touch_softlockup_watchdog(void) | 283 | static inline void touch_softlockup_watchdog(void) |
284 | { | 284 | { |
285 | } | 285 | } |
286 | static inline void touch_softlockup_watchdog_sync(void) | 286 | static inline void touch_softlockup_watchdog_sync(void) |
287 | { | 287 | { |
288 | } | 288 | } |
289 | static inline void touch_all_softlockup_watchdogs(void) | 289 | static inline void touch_all_softlockup_watchdogs(void) |
290 | { | 290 | { |
291 | } | 291 | } |
292 | static inline void lockup_detector_init(void) | 292 | static inline void lockup_detector_init(void) |
293 | { | 293 | { |
294 | } | 294 | } |
295 | #endif | 295 | #endif |
296 | 296 | ||
297 | #ifdef CONFIG_DETECT_HUNG_TASK | 297 | #ifdef CONFIG_DETECT_HUNG_TASK |
298 | extern unsigned int sysctl_hung_task_panic; | 298 | extern unsigned int sysctl_hung_task_panic; |
299 | extern unsigned long sysctl_hung_task_check_count; | 299 | extern unsigned long sysctl_hung_task_check_count; |
300 | extern unsigned long sysctl_hung_task_timeout_secs; | 300 | extern unsigned long sysctl_hung_task_timeout_secs; |
301 | extern unsigned long sysctl_hung_task_warnings; | 301 | extern unsigned long sysctl_hung_task_warnings; |
302 | extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, | 302 | extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, |
303 | void __user *buffer, | 303 | void __user *buffer, |
304 | size_t *lenp, loff_t *ppos); | 304 | size_t *lenp, loff_t *ppos); |
305 | #else | 305 | #else |
306 | /* Avoid need for ifdefs elsewhere in the code */ | 306 | /* Avoid need for ifdefs elsewhere in the code */ |
307 | enum { sysctl_hung_task_timeout_secs = 0 }; | 307 | enum { sysctl_hung_task_timeout_secs = 0 }; |
308 | #endif | 308 | #endif |
309 | 309 | ||
310 | /* Attach to any functions which should be ignored in wchan output. */ | 310 | /* Attach to any functions which should be ignored in wchan output. */ |
311 | #define __sched __attribute__((__section__(".sched.text"))) | 311 | #define __sched __attribute__((__section__(".sched.text"))) |
312 | 312 | ||
313 | /* Linker adds these: start and end of __sched functions */ | 313 | /* Linker adds these: start and end of __sched functions */ |
314 | extern char __sched_text_start[], __sched_text_end[]; | 314 | extern char __sched_text_start[], __sched_text_end[]; |
315 | 315 | ||
316 | /* Is this address in the __sched functions? */ | 316 | /* Is this address in the __sched functions? */ |
317 | extern int in_sched_functions(unsigned long addr); | 317 | extern int in_sched_functions(unsigned long addr); |
318 | 318 | ||
319 | #define MAX_SCHEDULE_TIMEOUT LONG_MAX | 319 | #define MAX_SCHEDULE_TIMEOUT LONG_MAX |
320 | extern signed long schedule_timeout(signed long timeout); | 320 | extern signed long schedule_timeout(signed long timeout); |
321 | extern signed long schedule_timeout_interruptible(signed long timeout); | 321 | extern signed long schedule_timeout_interruptible(signed long timeout); |
322 | extern signed long schedule_timeout_killable(signed long timeout); | 322 | extern signed long schedule_timeout_killable(signed long timeout); |
323 | extern signed long schedule_timeout_uninterruptible(signed long timeout); | 323 | extern signed long schedule_timeout_uninterruptible(signed long timeout); |
324 | asmlinkage void schedule(void); | 324 | asmlinkage void schedule(void); |
325 | extern void schedule_preempt_disabled(void); | 325 | extern void schedule_preempt_disabled(void); |
326 | extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); | 326 | extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); |
327 | 327 | ||
328 | struct nsproxy; | 328 | struct nsproxy; |
329 | struct user_namespace; | 329 | struct user_namespace; |
330 | 330 | ||
331 | /* | 331 | /* |
332 | * Default maximum number of active map areas, this limits the number of vmas | 332 | * Default maximum number of active map areas, this limits the number of vmas |
333 | * per mm struct. Users can overwrite this number by sysctl but there is a | 333 | * per mm struct. Users can overwrite this number by sysctl but there is a |
334 | * problem. | 334 | * problem. |
335 | * | 335 | * |
336 | * When a program's coredump is generated as ELF format, a section is created | 336 | * When a program's coredump is generated as ELF format, a section is created |
337 | * per a vma. In ELF, the number of sections is represented in unsigned short. | 337 | * per a vma. In ELF, the number of sections is represented in unsigned short. |
338 | * This means the number of sections should be smaller than 65535 at coredump. | 338 | * This means the number of sections should be smaller than 65535 at coredump. |
339 | * Because the kernel adds some informative sections to a image of program at | 339 | * Because the kernel adds some informative sections to a image of program at |
340 | * generating coredump, we need some margin. The number of extra sections is | 340 | * generating coredump, we need some margin. The number of extra sections is |
341 | * 1-3 now and depends on arch. We use "5" as safe margin, here. | 341 | * 1-3 now and depends on arch. We use "5" as safe margin, here. |
342 | */ | 342 | */ |
343 | #define MAPCOUNT_ELF_CORE_MARGIN (5) | 343 | #define MAPCOUNT_ELF_CORE_MARGIN (5) |
344 | #define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) | 344 | #define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) |
345 | 345 | ||
346 | extern int sysctl_max_map_count; | 346 | extern int sysctl_max_map_count; |
347 | 347 | ||
348 | #include <linux/aio.h> | 348 | #include <linux/aio.h> |
349 | 349 | ||
350 | #ifdef CONFIG_MMU | 350 | #ifdef CONFIG_MMU |
351 | extern void arch_pick_mmap_layout(struct mm_struct *mm); | 351 | extern void arch_pick_mmap_layout(struct mm_struct *mm); |
352 | extern unsigned long | 352 | extern unsigned long |
353 | arch_get_unmapped_area(struct file *, unsigned long, unsigned long, | 353 | arch_get_unmapped_area(struct file *, unsigned long, unsigned long, |
354 | unsigned long, unsigned long); | 354 | unsigned long, unsigned long); |
355 | extern unsigned long | 355 | extern unsigned long |
356 | arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, | 356 | arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, |
357 | unsigned long len, unsigned long pgoff, | 357 | unsigned long len, unsigned long pgoff, |
358 | unsigned long flags); | 358 | unsigned long flags); |
359 | extern void arch_unmap_area(struct mm_struct *, unsigned long); | 359 | extern void arch_unmap_area(struct mm_struct *, unsigned long); |
360 | extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); | 360 | extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); |
361 | #else | 361 | #else |
362 | static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} | 362 | static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} |
363 | #endif | 363 | #endif |
364 | 364 | ||
365 | 365 | ||
366 | extern void set_dumpable(struct mm_struct *mm, int value); | 366 | extern void set_dumpable(struct mm_struct *mm, int value); |
367 | extern int get_dumpable(struct mm_struct *mm); | 367 | extern int get_dumpable(struct mm_struct *mm); |
368 | 368 | ||
369 | /* get/set_dumpable() values */ | 369 | /* get/set_dumpable() values */ |
370 | #define SUID_DUMPABLE_DISABLED 0 | 370 | #define SUID_DUMPABLE_DISABLED 0 |
371 | #define SUID_DUMPABLE_ENABLED 1 | 371 | #define SUID_DUMPABLE_ENABLED 1 |
372 | #define SUID_DUMPABLE_SAFE 2 | 372 | #define SUID_DUMPABLE_SAFE 2 |
373 | 373 | ||
374 | /* mm flags */ | 374 | /* mm flags */ |
375 | /* dumpable bits */ | 375 | /* dumpable bits */ |
376 | #define MMF_DUMPABLE 0 /* core dump is permitted */ | 376 | #define MMF_DUMPABLE 0 /* core dump is permitted */ |
377 | #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ | 377 | #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ |
378 | 378 | ||
379 | #define MMF_DUMPABLE_BITS 2 | 379 | #define MMF_DUMPABLE_BITS 2 |
380 | #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) | 380 | #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) |
381 | 381 | ||
382 | /* coredump filter bits */ | 382 | /* coredump filter bits */ |
383 | #define MMF_DUMP_ANON_PRIVATE 2 | 383 | #define MMF_DUMP_ANON_PRIVATE 2 |
384 | #define MMF_DUMP_ANON_SHARED 3 | 384 | #define MMF_DUMP_ANON_SHARED 3 |
385 | #define MMF_DUMP_MAPPED_PRIVATE 4 | 385 | #define MMF_DUMP_MAPPED_PRIVATE 4 |
386 | #define MMF_DUMP_MAPPED_SHARED 5 | 386 | #define MMF_DUMP_MAPPED_SHARED 5 |
387 | #define MMF_DUMP_ELF_HEADERS 6 | 387 | #define MMF_DUMP_ELF_HEADERS 6 |
388 | #define MMF_DUMP_HUGETLB_PRIVATE 7 | 388 | #define MMF_DUMP_HUGETLB_PRIVATE 7 |
389 | #define MMF_DUMP_HUGETLB_SHARED 8 | 389 | #define MMF_DUMP_HUGETLB_SHARED 8 |
390 | 390 | ||
391 | #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS | 391 | #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS |
392 | #define MMF_DUMP_FILTER_BITS 7 | 392 | #define MMF_DUMP_FILTER_BITS 7 |
393 | #define MMF_DUMP_FILTER_MASK \ | 393 | #define MMF_DUMP_FILTER_MASK \ |
394 | (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) | 394 | (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) |
395 | #define MMF_DUMP_FILTER_DEFAULT \ | 395 | #define MMF_DUMP_FILTER_DEFAULT \ |
396 | ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ | 396 | ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ |
397 | (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) | 397 | (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) |
398 | 398 | ||
399 | #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS | 399 | #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS |
400 | # define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) | 400 | # define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) |
401 | #else | 401 | #else |
402 | # define MMF_DUMP_MASK_DEFAULT_ELF 0 | 402 | # define MMF_DUMP_MASK_DEFAULT_ELF 0 |
403 | #endif | 403 | #endif |
404 | /* leave room for more dump flags */ | 404 | /* leave room for more dump flags */ |
405 | #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ | 405 | #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ |
406 | #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ | 406 | #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ |
407 | #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ | 407 | #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ |
408 | 408 | ||
409 | #define MMF_HAS_UPROBES 19 /* has uprobes */ | 409 | #define MMF_HAS_UPROBES 19 /* has uprobes */ |
410 | #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ | 410 | #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ |
411 | 411 | ||
412 | #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) | 412 | #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) |
413 | 413 | ||
414 | struct sighand_struct { | 414 | struct sighand_struct { |
415 | atomic_t count; | 415 | atomic_t count; |
416 | struct k_sigaction action[_NSIG]; | 416 | struct k_sigaction action[_NSIG]; |
417 | spinlock_t siglock; | 417 | spinlock_t siglock; |
418 | wait_queue_head_t signalfd_wqh; | 418 | wait_queue_head_t signalfd_wqh; |
419 | }; | 419 | }; |
420 | 420 | ||
421 | struct pacct_struct { | 421 | struct pacct_struct { |
422 | int ac_flag; | 422 | int ac_flag; |
423 | long ac_exitcode; | 423 | long ac_exitcode; |
424 | unsigned long ac_mem; | 424 | unsigned long ac_mem; |
425 | cputime_t ac_utime, ac_stime; | 425 | cputime_t ac_utime, ac_stime; |
426 | unsigned long ac_minflt, ac_majflt; | 426 | unsigned long ac_minflt, ac_majflt; |
427 | }; | 427 | }; |
428 | 428 | ||
429 | struct cpu_itimer { | 429 | struct cpu_itimer { |
430 | cputime_t expires; | 430 | cputime_t expires; |
431 | cputime_t incr; | 431 | cputime_t incr; |
432 | u32 error; | 432 | u32 error; |
433 | u32 incr_error; | 433 | u32 incr_error; |
434 | }; | 434 | }; |
435 | 435 | ||
436 | /** | 436 | /** |
437 | * struct task_cputime - collected CPU time counts | 437 | * struct task_cputime - collected CPU time counts |
438 | * @utime: time spent in user mode, in &cputime_t units | 438 | * @utime: time spent in user mode, in &cputime_t units |
439 | * @stime: time spent in kernel mode, in &cputime_t units | 439 | * @stime: time spent in kernel mode, in &cputime_t units |
440 | * @sum_exec_runtime: total time spent on the CPU, in nanoseconds | 440 | * @sum_exec_runtime: total time spent on the CPU, in nanoseconds |
441 | * | 441 | * |
442 | * This structure groups together three kinds of CPU time that are | 442 | * This structure groups together three kinds of CPU time that are |
443 | * tracked for threads and thread groups. Most things considering | 443 | * tracked for threads and thread groups. Most things considering |
444 | * CPU time want to group these counts together and treat all three | 444 | * CPU time want to group these counts together and treat all three |
445 | * of them in parallel. | 445 | * of them in parallel. |
446 | */ | 446 | */ |
447 | struct task_cputime { | 447 | struct task_cputime { |
448 | cputime_t utime; | 448 | cputime_t utime; |
449 | cputime_t stime; | 449 | cputime_t stime; |
450 | unsigned long long sum_exec_runtime; | 450 | unsigned long long sum_exec_runtime; |
451 | }; | 451 | }; |
452 | /* Alternate field names when used to cache expirations. */ | 452 | /* Alternate field names when used to cache expirations. */ |
453 | #define prof_exp stime | 453 | #define prof_exp stime |
454 | #define virt_exp utime | 454 | #define virt_exp utime |
455 | #define sched_exp sum_exec_runtime | 455 | #define sched_exp sum_exec_runtime |
456 | 456 | ||
457 | #define INIT_CPUTIME \ | 457 | #define INIT_CPUTIME \ |
458 | (struct task_cputime) { \ | 458 | (struct task_cputime) { \ |
459 | .utime = 0, \ | 459 | .utime = 0, \ |
460 | .stime = 0, \ | 460 | .stime = 0, \ |
461 | .sum_exec_runtime = 0, \ | 461 | .sum_exec_runtime = 0, \ |
462 | } | 462 | } |
463 | 463 | ||
464 | /* | 464 | /* |
465 | * Disable preemption until the scheduler is running. | 465 | * Disable preemption until the scheduler is running. |
466 | * Reset by start_kernel()->sched_init()->init_idle(). | 466 | * Reset by start_kernel()->sched_init()->init_idle(). |
467 | * | 467 | * |
468 | * We include PREEMPT_ACTIVE to avoid cond_resched() from working | 468 | * We include PREEMPT_ACTIVE to avoid cond_resched() from working |
469 | * before the scheduler is active -- see should_resched(). | 469 | * before the scheduler is active -- see should_resched(). |
470 | */ | 470 | */ |
471 | #define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE) | 471 | #define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE) |
472 | 472 | ||
473 | /** | 473 | /** |
474 | * struct thread_group_cputimer - thread group interval timer counts | 474 | * struct thread_group_cputimer - thread group interval timer counts |
475 | * @cputime: thread group interval timers. | 475 | * @cputime: thread group interval timers. |
476 | * @running: non-zero when there are timers running and | 476 | * @running: non-zero when there are timers running and |
477 | * @cputime receives updates. | 477 | * @cputime receives updates. |
478 | * @lock: lock for fields in this struct. | 478 | * @lock: lock for fields in this struct. |
479 | * | 479 | * |
480 | * This structure contains the version of task_cputime, above, that is | 480 | * This structure contains the version of task_cputime, above, that is |
481 | * used for thread group CPU timer calculations. | 481 | * used for thread group CPU timer calculations. |
482 | */ | 482 | */ |
483 | struct thread_group_cputimer { | 483 | struct thread_group_cputimer { |
484 | struct task_cputime cputime; | 484 | struct task_cputime cputime; |
485 | int running; | 485 | int running; |
486 | raw_spinlock_t lock; | 486 | raw_spinlock_t lock; |
487 | }; | 487 | }; |
488 | 488 | ||
489 | #include <linux/rwsem.h> | 489 | #include <linux/rwsem.h> |
490 | struct autogroup; | 490 | struct autogroup; |
491 | 491 | ||
492 | /* | 492 | /* |
493 | * NOTE! "signal_struct" does not have its own | 493 | * NOTE! "signal_struct" does not have its own |
494 | * locking, because a shared signal_struct always | 494 | * locking, because a shared signal_struct always |
495 | * implies a shared sighand_struct, so locking | 495 | * implies a shared sighand_struct, so locking |
496 | * sighand_struct is always a proper superset of | 496 | * sighand_struct is always a proper superset of |
497 | * the locking of signal_struct. | 497 | * the locking of signal_struct. |
498 | */ | 498 | */ |
499 | struct signal_struct { | 499 | struct signal_struct { |
500 | atomic_t sigcnt; | 500 | atomic_t sigcnt; |
501 | atomic_t live; | 501 | atomic_t live; |
502 | int nr_threads; | 502 | int nr_threads; |
503 | 503 | ||
504 | wait_queue_head_t wait_chldexit; /* for wait4() */ | 504 | wait_queue_head_t wait_chldexit; /* for wait4() */ |
505 | 505 | ||
506 | /* current thread group signal load-balancing target: */ | 506 | /* current thread group signal load-balancing target: */ |
507 | struct task_struct *curr_target; | 507 | struct task_struct *curr_target; |
508 | 508 | ||
509 | /* shared signal handling: */ | 509 | /* shared signal handling: */ |
510 | struct sigpending shared_pending; | 510 | struct sigpending shared_pending; |
511 | 511 | ||
512 | /* thread group exit support */ | 512 | /* thread group exit support */ |
513 | int group_exit_code; | 513 | int group_exit_code; |
514 | /* overloaded: | 514 | /* overloaded: |
515 | * - notify group_exit_task when ->count is equal to notify_count | 515 | * - notify group_exit_task when ->count is equal to notify_count |
516 | * - everyone except group_exit_task is stopped during signal delivery | 516 | * - everyone except group_exit_task is stopped during signal delivery |
517 | * of fatal signals, group_exit_task processes the signal. | 517 | * of fatal signals, group_exit_task processes the signal. |
518 | */ | 518 | */ |
519 | int notify_count; | 519 | int notify_count; |
520 | struct task_struct *group_exit_task; | 520 | struct task_struct *group_exit_task; |
521 | 521 | ||
522 | /* thread group stop support, overloads group_exit_code too */ | 522 | /* thread group stop support, overloads group_exit_code too */ |
523 | int group_stop_count; | 523 | int group_stop_count; |
524 | unsigned int flags; /* see SIGNAL_* flags below */ | 524 | unsigned int flags; /* see SIGNAL_* flags below */ |
525 | 525 | ||
526 | /* | 526 | /* |
527 | * PR_SET_CHILD_SUBREAPER marks a process, like a service | 527 | * PR_SET_CHILD_SUBREAPER marks a process, like a service |
528 | * manager, to re-parent orphan (double-forking) child processes | 528 | * manager, to re-parent orphan (double-forking) child processes |
529 | * to this process instead of 'init'. The service manager is | 529 | * to this process instead of 'init'. The service manager is |
530 | * able to receive SIGCHLD signals and is able to investigate | 530 | * able to receive SIGCHLD signals and is able to investigate |
531 | * the process until it calls wait(). All children of this | 531 | * the process until it calls wait(). All children of this |
532 | * process will inherit a flag if they should look for a | 532 | * process will inherit a flag if they should look for a |
533 | * child_subreaper process at exit. | 533 | * child_subreaper process at exit. |
534 | */ | 534 | */ |
535 | unsigned int is_child_subreaper:1; | 535 | unsigned int is_child_subreaper:1; |
536 | unsigned int has_child_subreaper:1; | 536 | unsigned int has_child_subreaper:1; |
537 | 537 | ||
538 | /* POSIX.1b Interval Timers */ | 538 | /* POSIX.1b Interval Timers */ |
539 | struct list_head posix_timers; | 539 | struct list_head posix_timers; |
540 | 540 | ||
541 | /* ITIMER_REAL timer for the process */ | 541 | /* ITIMER_REAL timer for the process */ |
542 | struct hrtimer real_timer; | 542 | struct hrtimer real_timer; |
543 | struct pid *leader_pid; | 543 | struct pid *leader_pid; |
544 | ktime_t it_real_incr; | 544 | ktime_t it_real_incr; |
545 | 545 | ||
546 | /* | 546 | /* |
547 | * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use | 547 | * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use |
548 | * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these | 548 | * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these |
549 | * values are defined to 0 and 1 respectively | 549 | * values are defined to 0 and 1 respectively |
550 | */ | 550 | */ |
551 | struct cpu_itimer it[2]; | 551 | struct cpu_itimer it[2]; |
552 | 552 | ||
553 | /* | 553 | /* |
554 | * Thread group totals for process CPU timers. | 554 | * Thread group totals for process CPU timers. |
555 | * See thread_group_cputimer(), et al, for details. | 555 | * See thread_group_cputimer(), et al, for details. |
556 | */ | 556 | */ |
557 | struct thread_group_cputimer cputimer; | 557 | struct thread_group_cputimer cputimer; |
558 | 558 | ||
559 | /* Earliest-expiration cache. */ | 559 | /* Earliest-expiration cache. */ |
560 | struct task_cputime cputime_expires; | 560 | struct task_cputime cputime_expires; |
561 | 561 | ||
562 | struct list_head cpu_timers[3]; | 562 | struct list_head cpu_timers[3]; |
563 | 563 | ||
564 | struct pid *tty_old_pgrp; | 564 | struct pid *tty_old_pgrp; |
565 | 565 | ||
566 | /* boolean value for session group leader */ | 566 | /* boolean value for session group leader */ |
567 | int leader; | 567 | int leader; |
568 | 568 | ||
569 | struct tty_struct *tty; /* NULL if no tty */ | 569 | struct tty_struct *tty; /* NULL if no tty */ |
570 | 570 | ||
571 | #ifdef CONFIG_SCHED_AUTOGROUP | 571 | #ifdef CONFIG_SCHED_AUTOGROUP |
572 | struct autogroup *autogroup; | 572 | struct autogroup *autogroup; |
573 | #endif | 573 | #endif |
574 | /* | 574 | /* |
575 | * Cumulative resource counters for dead threads in the group, | 575 | * Cumulative resource counters for dead threads in the group, |
576 | * and for reaped dead child processes forked by this group. | 576 | * and for reaped dead child processes forked by this group. |
577 | * Live threads maintain their own counters and add to these | 577 | * Live threads maintain their own counters and add to these |
578 | * in __exit_signal, except for the group leader. | 578 | * in __exit_signal, except for the group leader. |
579 | */ | 579 | */ |
580 | cputime_t utime, stime, cutime, cstime; | 580 | cputime_t utime, stime, cutime, cstime; |
581 | cputime_t gtime; | 581 | cputime_t gtime; |
582 | cputime_t cgtime; | 582 | cputime_t cgtime; |
583 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 583 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
584 | cputime_t prev_utime, prev_stime; | 584 | cputime_t prev_utime, prev_stime; |
585 | #endif | 585 | #endif |
586 | unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; | 586 | unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; |
587 | unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; | 587 | unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; |
588 | unsigned long inblock, oublock, cinblock, coublock; | 588 | unsigned long inblock, oublock, cinblock, coublock; |
589 | unsigned long maxrss, cmaxrss; | 589 | unsigned long maxrss, cmaxrss; |
590 | struct task_io_accounting ioac; | 590 | struct task_io_accounting ioac; |
591 | 591 | ||
592 | /* | 592 | /* |
593 | * Cumulative ns of schedule CPU time fo dead threads in the | 593 | * Cumulative ns of schedule CPU time fo dead threads in the |
594 | * group, not including a zombie group leader, (This only differs | 594 | * group, not including a zombie group leader, (This only differs |
595 | * from jiffies_to_ns(utime + stime) if sched_clock uses something | 595 | * from jiffies_to_ns(utime + stime) if sched_clock uses something |
596 | * other than jiffies.) | 596 | * other than jiffies.) |
597 | */ | 597 | */ |
598 | unsigned long long sum_sched_runtime; | 598 | unsigned long long sum_sched_runtime; |
599 | 599 | ||
600 | /* | 600 | /* |
601 | * We don't bother to synchronize most readers of this at all, | 601 | * We don't bother to synchronize most readers of this at all, |
602 | * because there is no reader checking a limit that actually needs | 602 | * because there is no reader checking a limit that actually needs |
603 | * to get both rlim_cur and rlim_max atomically, and either one | 603 | * to get both rlim_cur and rlim_max atomically, and either one |
604 | * alone is a single word that can safely be read normally. | 604 | * alone is a single word that can safely be read normally. |
605 | * getrlimit/setrlimit use task_lock(current->group_leader) to | 605 | * getrlimit/setrlimit use task_lock(current->group_leader) to |
606 | * protect this instead of the siglock, because they really | 606 | * protect this instead of the siglock, because they really |
607 | * have no need to disable irqs. | 607 | * have no need to disable irqs. |
608 | */ | 608 | */ |
609 | struct rlimit rlim[RLIM_NLIMITS]; | 609 | struct rlimit rlim[RLIM_NLIMITS]; |
610 | 610 | ||
611 | #ifdef CONFIG_BSD_PROCESS_ACCT | 611 | #ifdef CONFIG_BSD_PROCESS_ACCT |
612 | struct pacct_struct pacct; /* per-process accounting information */ | 612 | struct pacct_struct pacct; /* per-process accounting information */ |
613 | #endif | 613 | #endif |
614 | #ifdef CONFIG_TASKSTATS | 614 | #ifdef CONFIG_TASKSTATS |
615 | struct taskstats *stats; | 615 | struct taskstats *stats; |
616 | #endif | 616 | #endif |
617 | #ifdef CONFIG_AUDIT | 617 | #ifdef CONFIG_AUDIT |
618 | unsigned audit_tty; | 618 | unsigned audit_tty; |
619 | struct tty_audit_buf *tty_audit_buf; | 619 | struct tty_audit_buf *tty_audit_buf; |
620 | #endif | 620 | #endif |
621 | #ifdef CONFIG_CGROUPS | 621 | #ifdef CONFIG_CGROUPS |
622 | /* | 622 | /* |
623 | * group_rwsem prevents new tasks from entering the threadgroup and | 623 | * group_rwsem prevents new tasks from entering the threadgroup and |
624 | * member tasks from exiting,a more specifically, setting of | 624 | * member tasks from exiting,a more specifically, setting of |
625 | * PF_EXITING. fork and exit paths are protected with this rwsem | 625 | * PF_EXITING. fork and exit paths are protected with this rwsem |
626 | * using threadgroup_change_begin/end(). Users which require | 626 | * using threadgroup_change_begin/end(). Users which require |
627 | * threadgroup to remain stable should use threadgroup_[un]lock() | 627 | * threadgroup to remain stable should use threadgroup_[un]lock() |
628 | * which also takes care of exec path. Currently, cgroup is the | 628 | * which also takes care of exec path. Currently, cgroup is the |
629 | * only user. | 629 | * only user. |
630 | */ | 630 | */ |
631 | struct rw_semaphore group_rwsem; | 631 | struct rw_semaphore group_rwsem; |
632 | #endif | 632 | #endif |
633 | 633 | ||
634 | int oom_score_adj; /* OOM kill score adjustment */ | 634 | int oom_score_adj; /* OOM kill score adjustment */ |
635 | int oom_score_adj_min; /* OOM kill score adjustment minimum value. | 635 | int oom_score_adj_min; /* OOM kill score adjustment minimum value. |
636 | * Only settable by CAP_SYS_RESOURCE. */ | 636 | * Only settable by CAP_SYS_RESOURCE. */ |
637 | 637 | ||
638 | struct mutex cred_guard_mutex; /* guard against foreign influences on | 638 | struct mutex cred_guard_mutex; /* guard against foreign influences on |
639 | * credential calculations | 639 | * credential calculations |
640 | * (notably. ptrace) */ | 640 | * (notably. ptrace) */ |
641 | }; | 641 | }; |
642 | 642 | ||
643 | /* | 643 | /* |
644 | * Bits in flags field of signal_struct. | 644 | * Bits in flags field of signal_struct. |
645 | */ | 645 | */ |
646 | #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ | 646 | #define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ |
647 | #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ | 647 | #define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ |
648 | #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ | 648 | #define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ |
649 | /* | 649 | /* |
650 | * Pending notifications to parent. | 650 | * Pending notifications to parent. |
651 | */ | 651 | */ |
652 | #define SIGNAL_CLD_STOPPED 0x00000010 | 652 | #define SIGNAL_CLD_STOPPED 0x00000010 |
653 | #define SIGNAL_CLD_CONTINUED 0x00000020 | 653 | #define SIGNAL_CLD_CONTINUED 0x00000020 |
654 | #define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) | 654 | #define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) |
655 | 655 | ||
656 | #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ | 656 | #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ |
657 | 657 | ||
658 | /* If true, all threads except ->group_exit_task have pending SIGKILL */ | 658 | /* If true, all threads except ->group_exit_task have pending SIGKILL */ |
659 | static inline int signal_group_exit(const struct signal_struct *sig) | 659 | static inline int signal_group_exit(const struct signal_struct *sig) |
660 | { | 660 | { |
661 | return (sig->flags & SIGNAL_GROUP_EXIT) || | 661 | return (sig->flags & SIGNAL_GROUP_EXIT) || |
662 | (sig->group_exit_task != NULL); | 662 | (sig->group_exit_task != NULL); |
663 | } | 663 | } |
664 | 664 | ||
665 | /* | 665 | /* |
666 | * Some day this will be a full-fledged user tracking system.. | 666 | * Some day this will be a full-fledged user tracking system.. |
667 | */ | 667 | */ |
668 | struct user_struct { | 668 | struct user_struct { |
669 | atomic_t __count; /* reference count */ | 669 | atomic_t __count; /* reference count */ |
670 | atomic_t processes; /* How many processes does this user have? */ | 670 | atomic_t processes; /* How many processes does this user have? */ |
671 | atomic_t files; /* How many open files does this user have? */ | 671 | atomic_t files; /* How many open files does this user have? */ |
672 | atomic_t sigpending; /* How many pending signals does this user have? */ | 672 | atomic_t sigpending; /* How many pending signals does this user have? */ |
673 | #ifdef CONFIG_INOTIFY_USER | 673 | #ifdef CONFIG_INOTIFY_USER |
674 | atomic_t inotify_watches; /* How many inotify watches does this user have? */ | 674 | atomic_t inotify_watches; /* How many inotify watches does this user have? */ |
675 | atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ | 675 | atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ |
676 | #endif | 676 | #endif |
677 | #ifdef CONFIG_FANOTIFY | 677 | #ifdef CONFIG_FANOTIFY |
678 | atomic_t fanotify_listeners; | 678 | atomic_t fanotify_listeners; |
679 | #endif | 679 | #endif |
680 | #ifdef CONFIG_EPOLL | 680 | #ifdef CONFIG_EPOLL |
681 | atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ | 681 | atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ |
682 | #endif | 682 | #endif |
683 | #ifdef CONFIG_POSIX_MQUEUE | 683 | #ifdef CONFIG_POSIX_MQUEUE |
684 | /* protected by mq_lock */ | 684 | /* protected by mq_lock */ |
685 | unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ | 685 | unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ |
686 | #endif | 686 | #endif |
687 | unsigned long locked_shm; /* How many pages of mlocked shm ? */ | 687 | unsigned long locked_shm; /* How many pages of mlocked shm ? */ |
688 | 688 | ||
689 | #ifdef CONFIG_KEYS | 689 | #ifdef CONFIG_KEYS |
690 | struct key *uid_keyring; /* UID specific keyring */ | 690 | struct key *uid_keyring; /* UID specific keyring */ |
691 | struct key *session_keyring; /* UID's default session keyring */ | 691 | struct key *session_keyring; /* UID's default session keyring */ |
692 | #endif | 692 | #endif |
693 | 693 | ||
694 | /* Hash table maintenance information */ | 694 | /* Hash table maintenance information */ |
695 | struct hlist_node uidhash_node; | 695 | struct hlist_node uidhash_node; |
696 | kuid_t uid; | 696 | kuid_t uid; |
697 | 697 | ||
698 | #ifdef CONFIG_PERF_EVENTS | 698 | #ifdef CONFIG_PERF_EVENTS |
699 | atomic_long_t locked_vm; | 699 | atomic_long_t locked_vm; |
700 | #endif | 700 | #endif |
701 | }; | 701 | }; |
702 | 702 | ||
703 | extern int uids_sysfs_init(void); | 703 | extern int uids_sysfs_init(void); |
704 | 704 | ||
705 | extern struct user_struct *find_user(kuid_t); | 705 | extern struct user_struct *find_user(kuid_t); |
706 | 706 | ||
707 | extern struct user_struct root_user; | 707 | extern struct user_struct root_user; |
708 | #define INIT_USER (&root_user) | 708 | #define INIT_USER (&root_user) |
709 | 709 | ||
710 | 710 | ||
711 | struct backing_dev_info; | 711 | struct backing_dev_info; |
712 | struct reclaim_state; | 712 | struct reclaim_state; |
713 | 713 | ||
714 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 714 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
715 | struct sched_info { | 715 | struct sched_info { |
716 | /* cumulative counters */ | 716 | /* cumulative counters */ |
717 | unsigned long pcount; /* # of times run on this cpu */ | 717 | unsigned long pcount; /* # of times run on this cpu */ |
718 | unsigned long long run_delay; /* time spent waiting on a runqueue */ | 718 | unsigned long long run_delay; /* time spent waiting on a runqueue */ |
719 | 719 | ||
720 | /* timestamps */ | 720 | /* timestamps */ |
721 | unsigned long long last_arrival,/* when we last ran on a cpu */ | 721 | unsigned long long last_arrival,/* when we last ran on a cpu */ |
722 | last_queued; /* when we were last queued to run */ | 722 | last_queued; /* when we were last queued to run */ |
723 | }; | 723 | }; |
724 | #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ | 724 | #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ |
725 | 725 | ||
726 | #ifdef CONFIG_TASK_DELAY_ACCT | 726 | #ifdef CONFIG_TASK_DELAY_ACCT |
727 | struct task_delay_info { | 727 | struct task_delay_info { |
728 | spinlock_t lock; | 728 | spinlock_t lock; |
729 | unsigned int flags; /* Private per-task flags */ | 729 | unsigned int flags; /* Private per-task flags */ |
730 | 730 | ||
731 | /* For each stat XXX, add following, aligned appropriately | 731 | /* For each stat XXX, add following, aligned appropriately |
732 | * | 732 | * |
733 | * struct timespec XXX_start, XXX_end; | 733 | * struct timespec XXX_start, XXX_end; |
734 | * u64 XXX_delay; | 734 | * u64 XXX_delay; |
735 | * u32 XXX_count; | 735 | * u32 XXX_count; |
736 | * | 736 | * |
737 | * Atomicity of updates to XXX_delay, XXX_count protected by | 737 | * Atomicity of updates to XXX_delay, XXX_count protected by |
738 | * single lock above (split into XXX_lock if contention is an issue). | 738 | * single lock above (split into XXX_lock if contention is an issue). |
739 | */ | 739 | */ |
740 | 740 | ||
741 | /* | 741 | /* |
742 | * XXX_count is incremented on every XXX operation, the delay | 742 | * XXX_count is incremented on every XXX operation, the delay |
743 | * associated with the operation is added to XXX_delay. | 743 | * associated with the operation is added to XXX_delay. |
744 | * XXX_delay contains the accumulated delay time in nanoseconds. | 744 | * XXX_delay contains the accumulated delay time in nanoseconds. |
745 | */ | 745 | */ |
746 | struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */ | 746 | struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */ |
747 | u64 blkio_delay; /* wait for sync block io completion */ | 747 | u64 blkio_delay; /* wait for sync block io completion */ |
748 | u64 swapin_delay; /* wait for swapin block io completion */ | 748 | u64 swapin_delay; /* wait for swapin block io completion */ |
749 | u32 blkio_count; /* total count of the number of sync block */ | 749 | u32 blkio_count; /* total count of the number of sync block */ |
750 | /* io operations performed */ | 750 | /* io operations performed */ |
751 | u32 swapin_count; /* total count of the number of swapin block */ | 751 | u32 swapin_count; /* total count of the number of swapin block */ |
752 | /* io operations performed */ | 752 | /* io operations performed */ |
753 | 753 | ||
754 | struct timespec freepages_start, freepages_end; | 754 | struct timespec freepages_start, freepages_end; |
755 | u64 freepages_delay; /* wait for memory reclaim */ | 755 | u64 freepages_delay; /* wait for memory reclaim */ |
756 | u32 freepages_count; /* total count of memory reclaim */ | 756 | u32 freepages_count; /* total count of memory reclaim */ |
757 | }; | 757 | }; |
758 | #endif /* CONFIG_TASK_DELAY_ACCT */ | 758 | #endif /* CONFIG_TASK_DELAY_ACCT */ |
759 | 759 | ||
760 | static inline int sched_info_on(void) | 760 | static inline int sched_info_on(void) |
761 | { | 761 | { |
762 | #ifdef CONFIG_SCHEDSTATS | 762 | #ifdef CONFIG_SCHEDSTATS |
763 | return 1; | 763 | return 1; |
764 | #elif defined(CONFIG_TASK_DELAY_ACCT) | 764 | #elif defined(CONFIG_TASK_DELAY_ACCT) |
765 | extern int delayacct_on; | 765 | extern int delayacct_on; |
766 | return delayacct_on; | 766 | return delayacct_on; |
767 | #else | 767 | #else |
768 | return 0; | 768 | return 0; |
769 | #endif | 769 | #endif |
770 | } | 770 | } |
771 | 771 | ||
772 | enum cpu_idle_type { | 772 | enum cpu_idle_type { |
773 | CPU_IDLE, | 773 | CPU_IDLE, |
774 | CPU_NOT_IDLE, | 774 | CPU_NOT_IDLE, |
775 | CPU_NEWLY_IDLE, | 775 | CPU_NEWLY_IDLE, |
776 | CPU_MAX_IDLE_TYPES | 776 | CPU_MAX_IDLE_TYPES |
777 | }; | 777 | }; |
778 | 778 | ||
779 | /* | 779 | /* |
780 | * Increase resolution of nice-level calculations for 64-bit architectures. | 780 | * Increase resolution of nice-level calculations for 64-bit architectures. |
781 | * The extra resolution improves shares distribution and load balancing of | 781 | * The extra resolution improves shares distribution and load balancing of |
782 | * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup | 782 | * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup |
783 | * hierarchies, especially on larger systems. This is not a user-visible change | 783 | * hierarchies, especially on larger systems. This is not a user-visible change |
784 | * and does not change the user-interface for setting shares/weights. | 784 | * and does not change the user-interface for setting shares/weights. |
785 | * | 785 | * |
786 | * We increase resolution only if we have enough bits to allow this increased | 786 | * We increase resolution only if we have enough bits to allow this increased |
787 | * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution | 787 | * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution |
788 | * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the | 788 | * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the |
789 | * increased costs. | 789 | * increased costs. |
790 | */ | 790 | */ |
791 | #if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ | 791 | #if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ |
792 | # define SCHED_LOAD_RESOLUTION 10 | 792 | # define SCHED_LOAD_RESOLUTION 10 |
793 | # define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) | 793 | # define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) |
794 | # define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) | 794 | # define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION) |
795 | #else | 795 | #else |
796 | # define SCHED_LOAD_RESOLUTION 0 | 796 | # define SCHED_LOAD_RESOLUTION 0 |
797 | # define scale_load(w) (w) | 797 | # define scale_load(w) (w) |
798 | # define scale_load_down(w) (w) | 798 | # define scale_load_down(w) (w) |
799 | #endif | 799 | #endif |
800 | 800 | ||
801 | #define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) | 801 | #define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION) |
802 | #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) | 802 | #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) |
803 | 803 | ||
804 | /* | 804 | /* |
805 | * Increase resolution of cpu_power calculations | 805 | * Increase resolution of cpu_power calculations |
806 | */ | 806 | */ |
807 | #define SCHED_POWER_SHIFT 10 | 807 | #define SCHED_POWER_SHIFT 10 |
808 | #define SCHED_POWER_SCALE (1L << SCHED_POWER_SHIFT) | 808 | #define SCHED_POWER_SCALE (1L << SCHED_POWER_SHIFT) |
809 | 809 | ||
810 | /* | 810 | /* |
811 | * sched-domains (multiprocessor balancing) declarations: | 811 | * sched-domains (multiprocessor balancing) declarations: |
812 | */ | 812 | */ |
813 | #ifdef CONFIG_SMP | 813 | #ifdef CONFIG_SMP |
814 | #define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ | 814 | #define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ |
815 | #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ | 815 | #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ |
816 | #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ | 816 | #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ |
817 | #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ | 817 | #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ |
818 | #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ | 818 | #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ |
819 | #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ | 819 | #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ |
820 | #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ | 820 | #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ |
821 | #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ | 821 | #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ |
822 | #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ | 822 | #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ |
823 | #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ | 823 | #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ |
824 | #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ | 824 | #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ |
825 | #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ | 825 | #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ |
826 | 826 | ||
827 | extern int __weak arch_sd_sibiling_asym_packing(void); | 827 | extern int __weak arch_sd_sibiling_asym_packing(void); |
828 | 828 | ||
829 | struct sched_group_power { | 829 | struct sched_group_power { |
830 | atomic_t ref; | 830 | atomic_t ref; |
831 | /* | 831 | /* |
832 | * CPU power of this group, SCHED_LOAD_SCALE being max power for a | 832 | * CPU power of this group, SCHED_LOAD_SCALE being max power for a |
833 | * single CPU. | 833 | * single CPU. |
834 | */ | 834 | */ |
835 | unsigned int power, power_orig; | 835 | unsigned int power, power_orig; |
836 | unsigned long next_update; | 836 | unsigned long next_update; |
837 | /* | 837 | /* |
838 | * Number of busy cpus in this group. | 838 | * Number of busy cpus in this group. |
839 | */ | 839 | */ |
840 | atomic_t nr_busy_cpus; | 840 | atomic_t nr_busy_cpus; |
841 | 841 | ||
842 | unsigned long cpumask[0]; /* iteration mask */ | 842 | unsigned long cpumask[0]; /* iteration mask */ |
843 | }; | 843 | }; |
844 | 844 | ||
845 | struct sched_group { | 845 | struct sched_group { |
846 | struct sched_group *next; /* Must be a circular list */ | 846 | struct sched_group *next; /* Must be a circular list */ |
847 | atomic_t ref; | 847 | atomic_t ref; |
848 | 848 | ||
849 | unsigned int group_weight; | 849 | unsigned int group_weight; |
850 | struct sched_group_power *sgp; | 850 | struct sched_group_power *sgp; |
851 | 851 | ||
852 | /* | 852 | /* |
853 | * The CPUs this group covers. | 853 | * The CPUs this group covers. |
854 | * | 854 | * |
855 | * NOTE: this field is variable length. (Allocated dynamically | 855 | * NOTE: this field is variable length. (Allocated dynamically |
856 | * by attaching extra space to the end of the structure, | 856 | * by attaching extra space to the end of the structure, |
857 | * depending on how many CPUs the kernel has booted up with) | 857 | * depending on how many CPUs the kernel has booted up with) |
858 | */ | 858 | */ |
859 | unsigned long cpumask[0]; | 859 | unsigned long cpumask[0]; |
860 | }; | 860 | }; |
861 | 861 | ||
862 | static inline struct cpumask *sched_group_cpus(struct sched_group *sg) | 862 | static inline struct cpumask *sched_group_cpus(struct sched_group *sg) |
863 | { | 863 | { |
864 | return to_cpumask(sg->cpumask); | 864 | return to_cpumask(sg->cpumask); |
865 | } | 865 | } |
866 | 866 | ||
867 | /* | 867 | /* |
868 | * cpumask masking which cpus in the group are allowed to iterate up the domain | 868 | * cpumask masking which cpus in the group are allowed to iterate up the domain |
869 | * tree. | 869 | * tree. |
870 | */ | 870 | */ |
871 | static inline struct cpumask *sched_group_mask(struct sched_group *sg) | 871 | static inline struct cpumask *sched_group_mask(struct sched_group *sg) |
872 | { | 872 | { |
873 | return to_cpumask(sg->sgp->cpumask); | 873 | return to_cpumask(sg->sgp->cpumask); |
874 | } | 874 | } |
875 | 875 | ||
876 | /** | 876 | /** |
877 | * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. | 877 | * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. |
878 | * @group: The group whose first cpu is to be returned. | 878 | * @group: The group whose first cpu is to be returned. |
879 | */ | 879 | */ |
880 | static inline unsigned int group_first_cpu(struct sched_group *group) | 880 | static inline unsigned int group_first_cpu(struct sched_group *group) |
881 | { | 881 | { |
882 | return cpumask_first(sched_group_cpus(group)); | 882 | return cpumask_first(sched_group_cpus(group)); |
883 | } | 883 | } |
884 | 884 | ||
885 | struct sched_domain_attr { | 885 | struct sched_domain_attr { |
886 | int relax_domain_level; | 886 | int relax_domain_level; |
887 | }; | 887 | }; |
888 | 888 | ||
889 | #define SD_ATTR_INIT (struct sched_domain_attr) { \ | 889 | #define SD_ATTR_INIT (struct sched_domain_attr) { \ |
890 | .relax_domain_level = -1, \ | 890 | .relax_domain_level = -1, \ |
891 | } | 891 | } |
892 | 892 | ||
893 | extern int sched_domain_level_max; | 893 | extern int sched_domain_level_max; |
894 | 894 | ||
895 | struct sched_domain { | 895 | struct sched_domain { |
896 | /* These fields must be setup */ | 896 | /* These fields must be setup */ |
897 | struct sched_domain *parent; /* top domain must be null terminated */ | 897 | struct sched_domain *parent; /* top domain must be null terminated */ |
898 | struct sched_domain *child; /* bottom domain must be null terminated */ | 898 | struct sched_domain *child; /* bottom domain must be null terminated */ |
899 | struct sched_group *groups; /* the balancing groups of the domain */ | 899 | struct sched_group *groups; /* the balancing groups of the domain */ |
900 | unsigned long min_interval; /* Minimum balance interval ms */ | 900 | unsigned long min_interval; /* Minimum balance interval ms */ |
901 | unsigned long max_interval; /* Maximum balance interval ms */ | 901 | unsigned long max_interval; /* Maximum balance interval ms */ |
902 | unsigned int busy_factor; /* less balancing by factor if busy */ | 902 | unsigned int busy_factor; /* less balancing by factor if busy */ |
903 | unsigned int imbalance_pct; /* No balance until over watermark */ | 903 | unsigned int imbalance_pct; /* No balance until over watermark */ |
904 | unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ | 904 | unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ |
905 | unsigned int busy_idx; | 905 | unsigned int busy_idx; |
906 | unsigned int idle_idx; | 906 | unsigned int idle_idx; |
907 | unsigned int newidle_idx; | 907 | unsigned int newidle_idx; |
908 | unsigned int wake_idx; | 908 | unsigned int wake_idx; |
909 | unsigned int forkexec_idx; | 909 | unsigned int forkexec_idx; |
910 | unsigned int smt_gain; | 910 | unsigned int smt_gain; |
911 | int flags; /* See SD_* */ | 911 | int flags; /* See SD_* */ |
912 | int level; | 912 | int level; |
913 | 913 | ||
914 | /* Runtime fields. */ | 914 | /* Runtime fields. */ |
915 | unsigned long last_balance; /* init to jiffies. units in jiffies */ | 915 | unsigned long last_balance; /* init to jiffies. units in jiffies */ |
916 | unsigned int balance_interval; /* initialise to 1. units in ms. */ | 916 | unsigned int balance_interval; /* initialise to 1. units in ms. */ |
917 | unsigned int nr_balance_failed; /* initialise to 0 */ | 917 | unsigned int nr_balance_failed; /* initialise to 0 */ |
918 | 918 | ||
919 | u64 last_update; | 919 | u64 last_update; |
920 | 920 | ||
921 | #ifdef CONFIG_SCHEDSTATS | 921 | #ifdef CONFIG_SCHEDSTATS |
922 | /* load_balance() stats */ | 922 | /* load_balance() stats */ |
923 | unsigned int lb_count[CPU_MAX_IDLE_TYPES]; | 923 | unsigned int lb_count[CPU_MAX_IDLE_TYPES]; |
924 | unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; | 924 | unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; |
925 | unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; | 925 | unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; |
926 | unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; | 926 | unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; |
927 | unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; | 927 | unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; |
928 | unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; | 928 | unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; |
929 | unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; | 929 | unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; |
930 | unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; | 930 | unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; |
931 | 931 | ||
932 | /* Active load balancing */ | 932 | /* Active load balancing */ |
933 | unsigned int alb_count; | 933 | unsigned int alb_count; |
934 | unsigned int alb_failed; | 934 | unsigned int alb_failed; |
935 | unsigned int alb_pushed; | 935 | unsigned int alb_pushed; |
936 | 936 | ||
937 | /* SD_BALANCE_EXEC stats */ | 937 | /* SD_BALANCE_EXEC stats */ |
938 | unsigned int sbe_count; | 938 | unsigned int sbe_count; |
939 | unsigned int sbe_balanced; | 939 | unsigned int sbe_balanced; |
940 | unsigned int sbe_pushed; | 940 | unsigned int sbe_pushed; |
941 | 941 | ||
942 | /* SD_BALANCE_FORK stats */ | 942 | /* SD_BALANCE_FORK stats */ |
943 | unsigned int sbf_count; | 943 | unsigned int sbf_count; |
944 | unsigned int sbf_balanced; | 944 | unsigned int sbf_balanced; |
945 | unsigned int sbf_pushed; | 945 | unsigned int sbf_pushed; |
946 | 946 | ||
947 | /* try_to_wake_up() stats */ | 947 | /* try_to_wake_up() stats */ |
948 | unsigned int ttwu_wake_remote; | 948 | unsigned int ttwu_wake_remote; |
949 | unsigned int ttwu_move_affine; | 949 | unsigned int ttwu_move_affine; |
950 | unsigned int ttwu_move_balance; | 950 | unsigned int ttwu_move_balance; |
951 | #endif | 951 | #endif |
952 | #ifdef CONFIG_SCHED_DEBUG | 952 | #ifdef CONFIG_SCHED_DEBUG |
953 | char *name; | 953 | char *name; |
954 | #endif | 954 | #endif |
955 | union { | 955 | union { |
956 | void *private; /* used during construction */ | 956 | void *private; /* used during construction */ |
957 | struct rcu_head rcu; /* used during destruction */ | 957 | struct rcu_head rcu; /* used during destruction */ |
958 | }; | 958 | }; |
959 | 959 | ||
960 | unsigned int span_weight; | 960 | unsigned int span_weight; |
961 | /* | 961 | /* |
962 | * Span of all CPUs in this domain. | 962 | * Span of all CPUs in this domain. |
963 | * | 963 | * |
964 | * NOTE: this field is variable length. (Allocated dynamically | 964 | * NOTE: this field is variable length. (Allocated dynamically |
965 | * by attaching extra space to the end of the structure, | 965 | * by attaching extra space to the end of the structure, |
966 | * depending on how many CPUs the kernel has booted up with) | 966 | * depending on how many CPUs the kernel has booted up with) |
967 | */ | 967 | */ |
968 | unsigned long span[0]; | 968 | unsigned long span[0]; |
969 | }; | 969 | }; |
970 | 970 | ||
971 | static inline struct cpumask *sched_domain_span(struct sched_domain *sd) | 971 | static inline struct cpumask *sched_domain_span(struct sched_domain *sd) |
972 | { | 972 | { |
973 | return to_cpumask(sd->span); | 973 | return to_cpumask(sd->span); |
974 | } | 974 | } |
975 | 975 | ||
976 | extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], | 976 | extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
977 | struct sched_domain_attr *dattr_new); | 977 | struct sched_domain_attr *dattr_new); |
978 | 978 | ||
979 | /* Allocate an array of sched domains, for partition_sched_domains(). */ | 979 | /* Allocate an array of sched domains, for partition_sched_domains(). */ |
980 | cpumask_var_t *alloc_sched_domains(unsigned int ndoms); | 980 | cpumask_var_t *alloc_sched_domains(unsigned int ndoms); |
981 | void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); | 981 | void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); |
982 | 982 | ||
983 | /* Test a flag in parent sched domain */ | 983 | /* Test a flag in parent sched domain */ |
984 | static inline int test_sd_parent(struct sched_domain *sd, int flag) | 984 | static inline int test_sd_parent(struct sched_domain *sd, int flag) |
985 | { | 985 | { |
986 | if (sd->parent && (sd->parent->flags & flag)) | 986 | if (sd->parent && (sd->parent->flags & flag)) |
987 | return 1; | 987 | return 1; |
988 | 988 | ||
989 | return 0; | 989 | return 0; |
990 | } | 990 | } |
991 | 991 | ||
992 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); | 992 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); |
993 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); | 993 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); |
994 | 994 | ||
995 | bool cpus_share_cache(int this_cpu, int that_cpu); | 995 | bool cpus_share_cache(int this_cpu, int that_cpu); |
996 | 996 | ||
997 | #else /* CONFIG_SMP */ | 997 | #else /* CONFIG_SMP */ |
998 | 998 | ||
999 | struct sched_domain_attr; | 999 | struct sched_domain_attr; |
1000 | 1000 | ||
1001 | static inline void | 1001 | static inline void |
1002 | partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], | 1002 | partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
1003 | struct sched_domain_attr *dattr_new) | 1003 | struct sched_domain_attr *dattr_new) |
1004 | { | 1004 | { |
1005 | } | 1005 | } |
1006 | 1006 | ||
1007 | static inline bool cpus_share_cache(int this_cpu, int that_cpu) | 1007 | static inline bool cpus_share_cache(int this_cpu, int that_cpu) |
1008 | { | 1008 | { |
1009 | return true; | 1009 | return true; |
1010 | } | 1010 | } |
1011 | 1011 | ||
1012 | #endif /* !CONFIG_SMP */ | 1012 | #endif /* !CONFIG_SMP */ |
1013 | 1013 | ||
1014 | 1014 | ||
1015 | struct io_context; /* See blkdev.h */ | 1015 | struct io_context; /* See blkdev.h */ |
1016 | 1016 | ||
1017 | 1017 | ||
1018 | #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK | 1018 | #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK |
1019 | extern void prefetch_stack(struct task_struct *t); | 1019 | extern void prefetch_stack(struct task_struct *t); |
1020 | #else | 1020 | #else |
1021 | static inline void prefetch_stack(struct task_struct *t) { } | 1021 | static inline void prefetch_stack(struct task_struct *t) { } |
1022 | #endif | 1022 | #endif |
1023 | 1023 | ||
1024 | struct audit_context; /* See audit.c */ | 1024 | struct audit_context; /* See audit.c */ |
1025 | struct mempolicy; | 1025 | struct mempolicy; |
1026 | struct pipe_inode_info; | 1026 | struct pipe_inode_info; |
1027 | struct uts_namespace; | 1027 | struct uts_namespace; |
1028 | 1028 | ||
1029 | struct rq; | 1029 | struct rq; |
1030 | struct sched_domain; | 1030 | struct sched_domain; |
1031 | 1031 | ||
1032 | /* | 1032 | /* |
1033 | * wake flags | 1033 | * wake flags |
1034 | */ | 1034 | */ |
1035 | #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ | 1035 | #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ |
1036 | #define WF_FORK 0x02 /* child wakeup after fork */ | 1036 | #define WF_FORK 0x02 /* child wakeup after fork */ |
1037 | #define WF_MIGRATED 0x04 /* internal use, task got migrated */ | 1037 | #define WF_MIGRATED 0x04 /* internal use, task got migrated */ |
1038 | 1038 | ||
1039 | #define ENQUEUE_WAKEUP 1 | 1039 | #define ENQUEUE_WAKEUP 1 |
1040 | #define ENQUEUE_HEAD 2 | 1040 | #define ENQUEUE_HEAD 2 |
1041 | #ifdef CONFIG_SMP | 1041 | #ifdef CONFIG_SMP |
1042 | #define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ | 1042 | #define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */ |
1043 | #else | 1043 | #else |
1044 | #define ENQUEUE_WAKING 0 | 1044 | #define ENQUEUE_WAKING 0 |
1045 | #endif | 1045 | #endif |
1046 | 1046 | ||
1047 | #define DEQUEUE_SLEEP 1 | 1047 | #define DEQUEUE_SLEEP 1 |
1048 | 1048 | ||
1049 | struct sched_class { | 1049 | struct sched_class { |
1050 | const struct sched_class *next; | 1050 | const struct sched_class *next; |
1051 | 1051 | ||
1052 | void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); | 1052 | void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); |
1053 | void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); | 1053 | void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); |
1054 | void (*yield_task) (struct rq *rq); | 1054 | void (*yield_task) (struct rq *rq); |
1055 | bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); | 1055 | bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); |
1056 | 1056 | ||
1057 | void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); | 1057 | void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); |
1058 | 1058 | ||
1059 | struct task_struct * (*pick_next_task) (struct rq *rq); | 1059 | struct task_struct * (*pick_next_task) (struct rq *rq); |
1060 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); | 1060 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); |
1061 | 1061 | ||
1062 | #ifdef CONFIG_SMP | 1062 | #ifdef CONFIG_SMP |
1063 | int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); | 1063 | int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); |
1064 | void (*migrate_task_rq)(struct task_struct *p, int next_cpu); | 1064 | void (*migrate_task_rq)(struct task_struct *p, int next_cpu); |
1065 | 1065 | ||
1066 | void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); | 1066 | void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); |
1067 | void (*post_schedule) (struct rq *this_rq); | 1067 | void (*post_schedule) (struct rq *this_rq); |
1068 | void (*task_waking) (struct task_struct *task); | 1068 | void (*task_waking) (struct task_struct *task); |
1069 | void (*task_woken) (struct rq *this_rq, struct task_struct *task); | 1069 | void (*task_woken) (struct rq *this_rq, struct task_struct *task); |
1070 | 1070 | ||
1071 | void (*set_cpus_allowed)(struct task_struct *p, | 1071 | void (*set_cpus_allowed)(struct task_struct *p, |
1072 | const struct cpumask *newmask); | 1072 | const struct cpumask *newmask); |
1073 | 1073 | ||
1074 | void (*rq_online)(struct rq *rq); | 1074 | void (*rq_online)(struct rq *rq); |
1075 | void (*rq_offline)(struct rq *rq); | 1075 | void (*rq_offline)(struct rq *rq); |
1076 | #endif | 1076 | #endif |
1077 | 1077 | ||
1078 | void (*set_curr_task) (struct rq *rq); | 1078 | void (*set_curr_task) (struct rq *rq); |
1079 | void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); | 1079 | void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); |
1080 | void (*task_fork) (struct task_struct *p); | 1080 | void (*task_fork) (struct task_struct *p); |
1081 | 1081 | ||
1082 | void (*switched_from) (struct rq *this_rq, struct task_struct *task); | 1082 | void (*switched_from) (struct rq *this_rq, struct task_struct *task); |
1083 | void (*switched_to) (struct rq *this_rq, struct task_struct *task); | 1083 | void (*switched_to) (struct rq *this_rq, struct task_struct *task); |
1084 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, | 1084 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, |
1085 | int oldprio); | 1085 | int oldprio); |
1086 | 1086 | ||
1087 | unsigned int (*get_rr_interval) (struct rq *rq, | 1087 | unsigned int (*get_rr_interval) (struct rq *rq, |
1088 | struct task_struct *task); | 1088 | struct task_struct *task); |
1089 | 1089 | ||
1090 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1090 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1091 | void (*task_move_group) (struct task_struct *p, int on_rq); | 1091 | void (*task_move_group) (struct task_struct *p, int on_rq); |
1092 | #endif | 1092 | #endif |
1093 | }; | 1093 | }; |
1094 | 1094 | ||
1095 | struct load_weight { | 1095 | struct load_weight { |
1096 | unsigned long weight, inv_weight; | 1096 | unsigned long weight, inv_weight; |
1097 | }; | 1097 | }; |
1098 | 1098 | ||
1099 | struct sched_avg { | 1099 | struct sched_avg { |
1100 | /* | 1100 | /* |
1101 | * These sums represent an infinite geometric series and so are bound | 1101 | * These sums represent an infinite geometric series and so are bound |
1102 | * above by 1024/(1-y). Thus we only need a u32 to store them for for all | 1102 | * above by 1024/(1-y). Thus we only need a u32 to store them for for all |
1103 | * choices of y < 1-2^(-32)*1024. | 1103 | * choices of y < 1-2^(-32)*1024. |
1104 | */ | 1104 | */ |
1105 | u32 runnable_avg_sum, runnable_avg_period; | 1105 | u32 runnable_avg_sum, runnable_avg_period; |
1106 | u64 last_runnable_update; | 1106 | u64 last_runnable_update; |
1107 | s64 decay_count; | 1107 | s64 decay_count; |
1108 | unsigned long load_avg_contrib; | 1108 | unsigned long load_avg_contrib; |
1109 | }; | 1109 | }; |
1110 | 1110 | ||
1111 | #ifdef CONFIG_SCHEDSTATS | 1111 | #ifdef CONFIG_SCHEDSTATS |
1112 | struct sched_statistics { | 1112 | struct sched_statistics { |
1113 | u64 wait_start; | 1113 | u64 wait_start; |
1114 | u64 wait_max; | 1114 | u64 wait_max; |
1115 | u64 wait_count; | 1115 | u64 wait_count; |
1116 | u64 wait_sum; | 1116 | u64 wait_sum; |
1117 | u64 iowait_count; | 1117 | u64 iowait_count; |
1118 | u64 iowait_sum; | 1118 | u64 iowait_sum; |
1119 | 1119 | ||
1120 | u64 sleep_start; | 1120 | u64 sleep_start; |
1121 | u64 sleep_max; | 1121 | u64 sleep_max; |
1122 | s64 sum_sleep_runtime; | 1122 | s64 sum_sleep_runtime; |
1123 | 1123 | ||
1124 | u64 block_start; | 1124 | u64 block_start; |
1125 | u64 block_max; | 1125 | u64 block_max; |
1126 | u64 exec_max; | 1126 | u64 exec_max; |
1127 | u64 slice_max; | 1127 | u64 slice_max; |
1128 | 1128 | ||
1129 | u64 nr_migrations_cold; | 1129 | u64 nr_migrations_cold; |
1130 | u64 nr_failed_migrations_affine; | 1130 | u64 nr_failed_migrations_affine; |
1131 | u64 nr_failed_migrations_running; | 1131 | u64 nr_failed_migrations_running; |
1132 | u64 nr_failed_migrations_hot; | 1132 | u64 nr_failed_migrations_hot; |
1133 | u64 nr_forced_migrations; | 1133 | u64 nr_forced_migrations; |
1134 | 1134 | ||
1135 | u64 nr_wakeups; | 1135 | u64 nr_wakeups; |
1136 | u64 nr_wakeups_sync; | 1136 | u64 nr_wakeups_sync; |
1137 | u64 nr_wakeups_migrate; | 1137 | u64 nr_wakeups_migrate; |
1138 | u64 nr_wakeups_local; | 1138 | u64 nr_wakeups_local; |
1139 | u64 nr_wakeups_remote; | 1139 | u64 nr_wakeups_remote; |
1140 | u64 nr_wakeups_affine; | 1140 | u64 nr_wakeups_affine; |
1141 | u64 nr_wakeups_affine_attempts; | 1141 | u64 nr_wakeups_affine_attempts; |
1142 | u64 nr_wakeups_passive; | 1142 | u64 nr_wakeups_passive; |
1143 | u64 nr_wakeups_idle; | 1143 | u64 nr_wakeups_idle; |
1144 | }; | 1144 | }; |
1145 | #endif | 1145 | #endif |
1146 | 1146 | ||
1147 | struct sched_entity { | 1147 | struct sched_entity { |
1148 | struct load_weight load; /* for load-balancing */ | 1148 | struct load_weight load; /* for load-balancing */ |
1149 | struct rb_node run_node; | 1149 | struct rb_node run_node; |
1150 | struct list_head group_node; | 1150 | struct list_head group_node; |
1151 | unsigned int on_rq; | 1151 | unsigned int on_rq; |
1152 | 1152 | ||
1153 | u64 exec_start; | 1153 | u64 exec_start; |
1154 | u64 sum_exec_runtime; | 1154 | u64 sum_exec_runtime; |
1155 | u64 vruntime; | 1155 | u64 vruntime; |
1156 | u64 prev_sum_exec_runtime; | 1156 | u64 prev_sum_exec_runtime; |
1157 | 1157 | ||
1158 | u64 nr_migrations; | 1158 | u64 nr_migrations; |
1159 | 1159 | ||
1160 | #ifdef CONFIG_SCHEDSTATS | 1160 | #ifdef CONFIG_SCHEDSTATS |
1161 | struct sched_statistics statistics; | 1161 | struct sched_statistics statistics; |
1162 | #endif | 1162 | #endif |
1163 | 1163 | ||
1164 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1164 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1165 | struct sched_entity *parent; | 1165 | struct sched_entity *parent; |
1166 | /* rq on which this entity is (to be) queued: */ | 1166 | /* rq on which this entity is (to be) queued: */ |
1167 | struct cfs_rq *cfs_rq; | 1167 | struct cfs_rq *cfs_rq; |
1168 | /* rq "owned" by this entity/group: */ | 1168 | /* rq "owned" by this entity/group: */ |
1169 | struct cfs_rq *my_q; | 1169 | struct cfs_rq *my_q; |
1170 | #endif | 1170 | #endif |
1171 | /* | 1171 | /* |
1172 | * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be | 1172 | * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be |
1173 | * removed when useful for applications beyond shares distribution (e.g. | 1173 | * removed when useful for applications beyond shares distribution (e.g. |
1174 | * load-balance). | 1174 | * load-balance). |
1175 | */ | 1175 | */ |
1176 | #if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED) | 1176 | #if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED) |
1177 | /* Per-entity load-tracking */ | 1177 | /* Per-entity load-tracking */ |
1178 | struct sched_avg avg; | 1178 | struct sched_avg avg; |
1179 | #endif | 1179 | #endif |
1180 | }; | 1180 | }; |
1181 | 1181 | ||
1182 | struct sched_rt_entity { | 1182 | struct sched_rt_entity { |
1183 | struct list_head run_list; | 1183 | struct list_head run_list; |
1184 | unsigned long timeout; | 1184 | unsigned long timeout; |
1185 | unsigned int time_slice; | 1185 | unsigned int time_slice; |
1186 | 1186 | ||
1187 | struct sched_rt_entity *back; | 1187 | struct sched_rt_entity *back; |
1188 | #ifdef CONFIG_RT_GROUP_SCHED | 1188 | #ifdef CONFIG_RT_GROUP_SCHED |
1189 | struct sched_rt_entity *parent; | 1189 | struct sched_rt_entity *parent; |
1190 | /* rq on which this entity is (to be) queued: */ | 1190 | /* rq on which this entity is (to be) queued: */ |
1191 | struct rt_rq *rt_rq; | 1191 | struct rt_rq *rt_rq; |
1192 | /* rq "owned" by this entity/group: */ | 1192 | /* rq "owned" by this entity/group: */ |
1193 | struct rt_rq *my_q; | 1193 | struct rt_rq *my_q; |
1194 | #endif | 1194 | #endif |
1195 | }; | 1195 | }; |
1196 | 1196 | ||
1197 | /* | 1197 | /* |
1198 | * default timeslice is 100 msecs (used only for SCHED_RR tasks). | 1198 | * default timeslice is 100 msecs (used only for SCHED_RR tasks). |
1199 | * Timeslices get refilled after they expire. | 1199 | * Timeslices get refilled after they expire. |
1200 | */ | 1200 | */ |
1201 | #define RR_TIMESLICE (100 * HZ / 1000) | 1201 | #define RR_TIMESLICE (100 * HZ / 1000) |
1202 | 1202 | ||
1203 | struct rcu_node; | 1203 | struct rcu_node; |
1204 | 1204 | ||
1205 | enum perf_event_task_context { | 1205 | enum perf_event_task_context { |
1206 | perf_invalid_context = -1, | 1206 | perf_invalid_context = -1, |
1207 | perf_hw_context = 0, | 1207 | perf_hw_context = 0, |
1208 | perf_sw_context, | 1208 | perf_sw_context, |
1209 | perf_nr_task_contexts, | 1209 | perf_nr_task_contexts, |
1210 | }; | 1210 | }; |
1211 | 1211 | ||
1212 | struct task_struct { | 1212 | struct task_struct { |
1213 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 1213 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
1214 | void *stack; | 1214 | void *stack; |
1215 | atomic_t usage; | 1215 | atomic_t usage; |
1216 | unsigned int flags; /* per process flags, defined below */ | 1216 | unsigned int flags; /* per process flags, defined below */ |
1217 | unsigned int ptrace; | 1217 | unsigned int ptrace; |
1218 | 1218 | ||
1219 | #ifdef CONFIG_SMP | 1219 | #ifdef CONFIG_SMP |
1220 | struct llist_node wake_entry; | 1220 | struct llist_node wake_entry; |
1221 | int on_cpu; | 1221 | int on_cpu; |
1222 | #endif | 1222 | #endif |
1223 | int on_rq; | 1223 | int on_rq; |
1224 | 1224 | ||
1225 | int prio, static_prio, normal_prio; | 1225 | int prio, static_prio, normal_prio; |
1226 | unsigned int rt_priority; | 1226 | unsigned int rt_priority; |
1227 | const struct sched_class *sched_class; | 1227 | const struct sched_class *sched_class; |
1228 | struct sched_entity se; | 1228 | struct sched_entity se; |
1229 | struct sched_rt_entity rt; | 1229 | struct sched_rt_entity rt; |
1230 | #ifdef CONFIG_CGROUP_SCHED | 1230 | #ifdef CONFIG_CGROUP_SCHED |
1231 | struct task_group *sched_task_group; | 1231 | struct task_group *sched_task_group; |
1232 | #endif | 1232 | #endif |
1233 | 1233 | ||
1234 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 1234 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
1235 | /* list of struct preempt_notifier: */ | 1235 | /* list of struct preempt_notifier: */ |
1236 | struct hlist_head preempt_notifiers; | 1236 | struct hlist_head preempt_notifiers; |
1237 | #endif | 1237 | #endif |
1238 | 1238 | ||
1239 | /* | 1239 | /* |
1240 | * fpu_counter contains the number of consecutive context switches | 1240 | * fpu_counter contains the number of consecutive context switches |
1241 | * that the FPU is used. If this is over a threshold, the lazy fpu | 1241 | * that the FPU is used. If this is over a threshold, the lazy fpu |
1242 | * saving becomes unlazy to save the trap. This is an unsigned char | 1242 | * saving becomes unlazy to save the trap. This is an unsigned char |
1243 | * so that after 256 times the counter wraps and the behavior turns | 1243 | * so that after 256 times the counter wraps and the behavior turns |
1244 | * lazy again; this to deal with bursty apps that only use FPU for | 1244 | * lazy again; this to deal with bursty apps that only use FPU for |
1245 | * a short time | 1245 | * a short time |
1246 | */ | 1246 | */ |
1247 | unsigned char fpu_counter; | 1247 | unsigned char fpu_counter; |
1248 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 1248 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
1249 | unsigned int btrace_seq; | 1249 | unsigned int btrace_seq; |
1250 | #endif | 1250 | #endif |
1251 | 1251 | ||
1252 | unsigned int policy; | 1252 | unsigned int policy; |
1253 | int nr_cpus_allowed; | 1253 | int nr_cpus_allowed; |
1254 | cpumask_t cpus_allowed; | 1254 | cpumask_t cpus_allowed; |
1255 | 1255 | ||
1256 | #ifdef CONFIG_PREEMPT_RCU | 1256 | #ifdef CONFIG_PREEMPT_RCU |
1257 | int rcu_read_lock_nesting; | 1257 | int rcu_read_lock_nesting; |
1258 | char rcu_read_unlock_special; | 1258 | char rcu_read_unlock_special; |
1259 | struct list_head rcu_node_entry; | 1259 | struct list_head rcu_node_entry; |
1260 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | 1260 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ |
1261 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1261 | #ifdef CONFIG_TREE_PREEMPT_RCU |
1262 | struct rcu_node *rcu_blocked_node; | 1262 | struct rcu_node *rcu_blocked_node; |
1263 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1263 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
1264 | #ifdef CONFIG_RCU_BOOST | 1264 | #ifdef CONFIG_RCU_BOOST |
1265 | struct rt_mutex *rcu_boost_mutex; | 1265 | struct rt_mutex *rcu_boost_mutex; |
1266 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 1266 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
1267 | 1267 | ||
1268 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 1268 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
1269 | struct sched_info sched_info; | 1269 | struct sched_info sched_info; |
1270 | #endif | 1270 | #endif |
1271 | 1271 | ||
1272 | struct list_head tasks; | 1272 | struct list_head tasks; |
1273 | #ifdef CONFIG_SMP | 1273 | #ifdef CONFIG_SMP |
1274 | struct plist_node pushable_tasks; | 1274 | struct plist_node pushable_tasks; |
1275 | #endif | 1275 | #endif |
1276 | 1276 | ||
1277 | struct mm_struct *mm, *active_mm; | 1277 | struct mm_struct *mm, *active_mm; |
1278 | #ifdef CONFIG_COMPAT_BRK | 1278 | #ifdef CONFIG_COMPAT_BRK |
1279 | unsigned brk_randomized:1; | 1279 | unsigned brk_randomized:1; |
1280 | #endif | 1280 | #endif |
1281 | #if defined(SPLIT_RSS_COUNTING) | 1281 | #if defined(SPLIT_RSS_COUNTING) |
1282 | struct task_rss_stat rss_stat; | 1282 | struct task_rss_stat rss_stat; |
1283 | #endif | 1283 | #endif |
1284 | /* task state */ | 1284 | /* task state */ |
1285 | int exit_state; | 1285 | int exit_state; |
1286 | int exit_code, exit_signal; | 1286 | int exit_code, exit_signal; |
1287 | int pdeath_signal; /* The signal sent when the parent dies */ | 1287 | int pdeath_signal; /* The signal sent when the parent dies */ |
1288 | unsigned int jobctl; /* JOBCTL_*, siglock protected */ | 1288 | unsigned int jobctl; /* JOBCTL_*, siglock protected */ |
1289 | /* ??? */ | 1289 | /* ??? */ |
1290 | unsigned int personality; | 1290 | unsigned int personality; |
1291 | unsigned did_exec:1; | 1291 | unsigned did_exec:1; |
1292 | unsigned in_execve:1; /* Tell the LSMs that the process is doing an | 1292 | unsigned in_execve:1; /* Tell the LSMs that the process is doing an |
1293 | * execve */ | 1293 | * execve */ |
1294 | unsigned in_iowait:1; | 1294 | unsigned in_iowait:1; |
1295 | 1295 | ||
1296 | /* task may not gain privileges */ | 1296 | /* task may not gain privileges */ |
1297 | unsigned no_new_privs:1; | 1297 | unsigned no_new_privs:1; |
1298 | 1298 | ||
1299 | /* Revert to default priority/policy when forking */ | 1299 | /* Revert to default priority/policy when forking */ |
1300 | unsigned sched_reset_on_fork:1; | 1300 | unsigned sched_reset_on_fork:1; |
1301 | unsigned sched_contributes_to_load:1; | 1301 | unsigned sched_contributes_to_load:1; |
1302 | 1302 | ||
1303 | pid_t pid; | 1303 | pid_t pid; |
1304 | pid_t tgid; | 1304 | pid_t tgid; |
1305 | 1305 | ||
1306 | #ifdef CONFIG_CC_STACKPROTECTOR | 1306 | #ifdef CONFIG_CC_STACKPROTECTOR |
1307 | /* Canary value for the -fstack-protector gcc feature */ | 1307 | /* Canary value for the -fstack-protector gcc feature */ |
1308 | unsigned long stack_canary; | 1308 | unsigned long stack_canary; |
1309 | #endif | 1309 | #endif |
1310 | /* | 1310 | /* |
1311 | * pointers to (original) parent process, youngest child, younger sibling, | 1311 | * pointers to (original) parent process, youngest child, younger sibling, |
1312 | * older sibling, respectively. (p->father can be replaced with | 1312 | * older sibling, respectively. (p->father can be replaced with |
1313 | * p->real_parent->pid) | 1313 | * p->real_parent->pid) |
1314 | */ | 1314 | */ |
1315 | struct task_struct __rcu *real_parent; /* real parent process */ | 1315 | struct task_struct __rcu *real_parent; /* real parent process */ |
1316 | struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ | 1316 | struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ |
1317 | /* | 1317 | /* |
1318 | * children/sibling forms the list of my natural children | 1318 | * children/sibling forms the list of my natural children |
1319 | */ | 1319 | */ |
1320 | struct list_head children; /* list of my children */ | 1320 | struct list_head children; /* list of my children */ |
1321 | struct list_head sibling; /* linkage in my parent's children list */ | 1321 | struct list_head sibling; /* linkage in my parent's children list */ |
1322 | struct task_struct *group_leader; /* threadgroup leader */ | 1322 | struct task_struct *group_leader; /* threadgroup leader */ |
1323 | 1323 | ||
1324 | /* | 1324 | /* |
1325 | * ptraced is the list of tasks this task is using ptrace on. | 1325 | * ptraced is the list of tasks this task is using ptrace on. |
1326 | * This includes both natural children and PTRACE_ATTACH targets. | 1326 | * This includes both natural children and PTRACE_ATTACH targets. |
1327 | * p->ptrace_entry is p's link on the p->parent->ptraced list. | 1327 | * p->ptrace_entry is p's link on the p->parent->ptraced list. |
1328 | */ | 1328 | */ |
1329 | struct list_head ptraced; | 1329 | struct list_head ptraced; |
1330 | struct list_head ptrace_entry; | 1330 | struct list_head ptrace_entry; |
1331 | 1331 | ||
1332 | /* PID/PID hash table linkage. */ | 1332 | /* PID/PID hash table linkage. */ |
1333 | struct pid_link pids[PIDTYPE_MAX]; | 1333 | struct pid_link pids[PIDTYPE_MAX]; |
1334 | struct list_head thread_group; | 1334 | struct list_head thread_group; |
1335 | 1335 | ||
1336 | struct completion *vfork_done; /* for vfork() */ | 1336 | struct completion *vfork_done; /* for vfork() */ |
1337 | int __user *set_child_tid; /* CLONE_CHILD_SETTID */ | 1337 | int __user *set_child_tid; /* CLONE_CHILD_SETTID */ |
1338 | int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ | 1338 | int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ |
1339 | 1339 | ||
1340 | cputime_t utime, stime, utimescaled, stimescaled; | 1340 | cputime_t utime, stime, utimescaled, stimescaled; |
1341 | cputime_t gtime; | 1341 | cputime_t gtime; |
1342 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 1342 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
1343 | cputime_t prev_utime, prev_stime; | 1343 | cputime_t prev_utime, prev_stime; |
1344 | #endif | 1344 | #endif |
1345 | unsigned long nvcsw, nivcsw; /* context switch counts */ | 1345 | unsigned long nvcsw, nivcsw; /* context switch counts */ |
1346 | struct timespec start_time; /* monotonic time */ | 1346 | struct timespec start_time; /* monotonic time */ |
1347 | struct timespec real_start_time; /* boot based time */ | 1347 | struct timespec real_start_time; /* boot based time */ |
1348 | /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ | 1348 | /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ |
1349 | unsigned long min_flt, maj_flt; | 1349 | unsigned long min_flt, maj_flt; |
1350 | 1350 | ||
1351 | struct task_cputime cputime_expires; | 1351 | struct task_cputime cputime_expires; |
1352 | struct list_head cpu_timers[3]; | 1352 | struct list_head cpu_timers[3]; |
1353 | 1353 | ||
1354 | /* process credentials */ | 1354 | /* process credentials */ |
1355 | const struct cred __rcu *real_cred; /* objective and real subjective task | 1355 | const struct cred __rcu *real_cred; /* objective and real subjective task |
1356 | * credentials (COW) */ | 1356 | * credentials (COW) */ |
1357 | const struct cred __rcu *cred; /* effective (overridable) subjective task | 1357 | const struct cred __rcu *cred; /* effective (overridable) subjective task |
1358 | * credentials (COW) */ | 1358 | * credentials (COW) */ |
1359 | char comm[TASK_COMM_LEN]; /* executable name excluding path | 1359 | char comm[TASK_COMM_LEN]; /* executable name excluding path |
1360 | - access with [gs]et_task_comm (which lock | 1360 | - access with [gs]et_task_comm (which lock |
1361 | it with task_lock()) | 1361 | it with task_lock()) |
1362 | - initialized normally by setup_new_exec */ | 1362 | - initialized normally by setup_new_exec */ |
1363 | /* file system info */ | 1363 | /* file system info */ |
1364 | int link_count, total_link_count; | 1364 | int link_count, total_link_count; |
1365 | #ifdef CONFIG_SYSVIPC | 1365 | #ifdef CONFIG_SYSVIPC |
1366 | /* ipc stuff */ | 1366 | /* ipc stuff */ |
1367 | struct sysv_sem sysvsem; | 1367 | struct sysv_sem sysvsem; |
1368 | #endif | 1368 | #endif |
1369 | #ifdef CONFIG_DETECT_HUNG_TASK | 1369 | #ifdef CONFIG_DETECT_HUNG_TASK |
1370 | /* hung task detection */ | 1370 | /* hung task detection */ |
1371 | unsigned long last_switch_count; | 1371 | unsigned long last_switch_count; |
1372 | #endif | 1372 | #endif |
1373 | /* CPU-specific state of this task */ | 1373 | /* CPU-specific state of this task */ |
1374 | struct thread_struct thread; | 1374 | struct thread_struct thread; |
1375 | /* filesystem information */ | 1375 | /* filesystem information */ |
1376 | struct fs_struct *fs; | 1376 | struct fs_struct *fs; |
1377 | /* open file information */ | 1377 | /* open file information */ |
1378 | struct files_struct *files; | 1378 | struct files_struct *files; |
1379 | /* namespaces */ | 1379 | /* namespaces */ |
1380 | struct nsproxy *nsproxy; | 1380 | struct nsproxy *nsproxy; |
1381 | /* signal handlers */ | 1381 | /* signal handlers */ |
1382 | struct signal_struct *signal; | 1382 | struct signal_struct *signal; |
1383 | struct sighand_struct *sighand; | 1383 | struct sighand_struct *sighand; |
1384 | 1384 | ||
1385 | sigset_t blocked, real_blocked; | 1385 | sigset_t blocked, real_blocked; |
1386 | sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ | 1386 | sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ |
1387 | struct sigpending pending; | 1387 | struct sigpending pending; |
1388 | 1388 | ||
1389 | unsigned long sas_ss_sp; | 1389 | unsigned long sas_ss_sp; |
1390 | size_t sas_ss_size; | 1390 | size_t sas_ss_size; |
1391 | int (*notifier)(void *priv); | 1391 | int (*notifier)(void *priv); |
1392 | void *notifier_data; | 1392 | void *notifier_data; |
1393 | sigset_t *notifier_mask; | 1393 | sigset_t *notifier_mask; |
1394 | struct callback_head *task_works; | 1394 | struct callback_head *task_works; |
1395 | 1395 | ||
1396 | struct audit_context *audit_context; | 1396 | struct audit_context *audit_context; |
1397 | #ifdef CONFIG_AUDITSYSCALL | 1397 | #ifdef CONFIG_AUDITSYSCALL |
1398 | kuid_t loginuid; | 1398 | kuid_t loginuid; |
1399 | unsigned int sessionid; | 1399 | unsigned int sessionid; |
1400 | #endif | 1400 | #endif |
1401 | struct seccomp seccomp; | 1401 | struct seccomp seccomp; |
1402 | 1402 | ||
1403 | /* Thread group tracking */ | 1403 | /* Thread group tracking */ |
1404 | u32 parent_exec_id; | 1404 | u32 parent_exec_id; |
1405 | u32 self_exec_id; | 1405 | u32 self_exec_id; |
1406 | /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, | 1406 | /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, |
1407 | * mempolicy */ | 1407 | * mempolicy */ |
1408 | spinlock_t alloc_lock; | 1408 | spinlock_t alloc_lock; |
1409 | 1409 | ||
1410 | /* Protection of the PI data structures: */ | 1410 | /* Protection of the PI data structures: */ |
1411 | raw_spinlock_t pi_lock; | 1411 | raw_spinlock_t pi_lock; |
1412 | 1412 | ||
1413 | #ifdef CONFIG_RT_MUTEXES | 1413 | #ifdef CONFIG_RT_MUTEXES |
1414 | /* PI waiters blocked on a rt_mutex held by this task */ | 1414 | /* PI waiters blocked on a rt_mutex held by this task */ |
1415 | struct plist_head pi_waiters; | 1415 | struct plist_head pi_waiters; |
1416 | /* Deadlock detection and priority inheritance handling */ | 1416 | /* Deadlock detection and priority inheritance handling */ |
1417 | struct rt_mutex_waiter *pi_blocked_on; | 1417 | struct rt_mutex_waiter *pi_blocked_on; |
1418 | #endif | 1418 | #endif |
1419 | 1419 | ||
1420 | #ifdef CONFIG_DEBUG_MUTEXES | 1420 | #ifdef CONFIG_DEBUG_MUTEXES |
1421 | /* mutex deadlock detection */ | 1421 | /* mutex deadlock detection */ |
1422 | struct mutex_waiter *blocked_on; | 1422 | struct mutex_waiter *blocked_on; |
1423 | #endif | 1423 | #endif |
1424 | #ifdef CONFIG_TRACE_IRQFLAGS | 1424 | #ifdef CONFIG_TRACE_IRQFLAGS |
1425 | unsigned int irq_events; | 1425 | unsigned int irq_events; |
1426 | unsigned long hardirq_enable_ip; | 1426 | unsigned long hardirq_enable_ip; |
1427 | unsigned long hardirq_disable_ip; | 1427 | unsigned long hardirq_disable_ip; |
1428 | unsigned int hardirq_enable_event; | 1428 | unsigned int hardirq_enable_event; |
1429 | unsigned int hardirq_disable_event; | 1429 | unsigned int hardirq_disable_event; |
1430 | int hardirqs_enabled; | 1430 | int hardirqs_enabled; |
1431 | int hardirq_context; | 1431 | int hardirq_context; |
1432 | unsigned long softirq_disable_ip; | 1432 | unsigned long softirq_disable_ip; |
1433 | unsigned long softirq_enable_ip; | 1433 | unsigned long softirq_enable_ip; |
1434 | unsigned int softirq_disable_event; | 1434 | unsigned int softirq_disable_event; |
1435 | unsigned int softirq_enable_event; | 1435 | unsigned int softirq_enable_event; |
1436 | int softirqs_enabled; | 1436 | int softirqs_enabled; |
1437 | int softirq_context; | 1437 | int softirq_context; |
1438 | #endif | 1438 | #endif |
1439 | #ifdef CONFIG_LOCKDEP | 1439 | #ifdef CONFIG_LOCKDEP |
1440 | # define MAX_LOCK_DEPTH 48UL | 1440 | # define MAX_LOCK_DEPTH 48UL |
1441 | u64 curr_chain_key; | 1441 | u64 curr_chain_key; |
1442 | int lockdep_depth; | 1442 | int lockdep_depth; |
1443 | unsigned int lockdep_recursion; | 1443 | unsigned int lockdep_recursion; |
1444 | struct held_lock held_locks[MAX_LOCK_DEPTH]; | 1444 | struct held_lock held_locks[MAX_LOCK_DEPTH]; |
1445 | gfp_t lockdep_reclaim_gfp; | 1445 | gfp_t lockdep_reclaim_gfp; |
1446 | #endif | 1446 | #endif |
1447 | 1447 | ||
1448 | /* journalling filesystem info */ | 1448 | /* journalling filesystem info */ |
1449 | void *journal_info; | 1449 | void *journal_info; |
1450 | 1450 | ||
1451 | /* stacked block device info */ | 1451 | /* stacked block device info */ |
1452 | struct bio_list *bio_list; | 1452 | struct bio_list *bio_list; |
1453 | 1453 | ||
1454 | #ifdef CONFIG_BLOCK | 1454 | #ifdef CONFIG_BLOCK |
1455 | /* stack plugging */ | 1455 | /* stack plugging */ |
1456 | struct blk_plug *plug; | 1456 | struct blk_plug *plug; |
1457 | #endif | 1457 | #endif |
1458 | 1458 | ||
1459 | /* VM state */ | 1459 | /* VM state */ |
1460 | struct reclaim_state *reclaim_state; | 1460 | struct reclaim_state *reclaim_state; |
1461 | 1461 | ||
1462 | struct backing_dev_info *backing_dev_info; | 1462 | struct backing_dev_info *backing_dev_info; |
1463 | 1463 | ||
1464 | struct io_context *io_context; | 1464 | struct io_context *io_context; |
1465 | 1465 | ||
1466 | unsigned long ptrace_message; | 1466 | unsigned long ptrace_message; |
1467 | siginfo_t *last_siginfo; /* For ptrace use. */ | 1467 | siginfo_t *last_siginfo; /* For ptrace use. */ |
1468 | struct task_io_accounting ioac; | 1468 | struct task_io_accounting ioac; |
1469 | #if defined(CONFIG_TASK_XACCT) | 1469 | #if defined(CONFIG_TASK_XACCT) |
1470 | u64 acct_rss_mem1; /* accumulated rss usage */ | 1470 | u64 acct_rss_mem1; /* accumulated rss usage */ |
1471 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ | 1471 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ |
1472 | cputime_t acct_timexpd; /* stime + utime since last update */ | 1472 | cputime_t acct_timexpd; /* stime + utime since last update */ |
1473 | #endif | 1473 | #endif |
1474 | #ifdef CONFIG_CPUSETS | 1474 | #ifdef CONFIG_CPUSETS |
1475 | nodemask_t mems_allowed; /* Protected by alloc_lock */ | 1475 | nodemask_t mems_allowed; /* Protected by alloc_lock */ |
1476 | seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ | 1476 | seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ |
1477 | int cpuset_mem_spread_rotor; | 1477 | int cpuset_mem_spread_rotor; |
1478 | int cpuset_slab_spread_rotor; | 1478 | int cpuset_slab_spread_rotor; |
1479 | #endif | 1479 | #endif |
1480 | #ifdef CONFIG_CGROUPS | 1480 | #ifdef CONFIG_CGROUPS |
1481 | /* Control Group info protected by css_set_lock */ | 1481 | /* Control Group info protected by css_set_lock */ |
1482 | struct css_set __rcu *cgroups; | 1482 | struct css_set __rcu *cgroups; |
1483 | /* cg_list protected by css_set_lock and tsk->alloc_lock */ | 1483 | /* cg_list protected by css_set_lock and tsk->alloc_lock */ |
1484 | struct list_head cg_list; | 1484 | struct list_head cg_list; |
1485 | #endif | 1485 | #endif |
1486 | #ifdef CONFIG_FUTEX | 1486 | #ifdef CONFIG_FUTEX |
1487 | struct robust_list_head __user *robust_list; | 1487 | struct robust_list_head __user *robust_list; |
1488 | #ifdef CONFIG_COMPAT | 1488 | #ifdef CONFIG_COMPAT |
1489 | struct compat_robust_list_head __user *compat_robust_list; | 1489 | struct compat_robust_list_head __user *compat_robust_list; |
1490 | #endif | 1490 | #endif |
1491 | struct list_head pi_state_list; | 1491 | struct list_head pi_state_list; |
1492 | struct futex_pi_state *pi_state_cache; | 1492 | struct futex_pi_state *pi_state_cache; |
1493 | #endif | 1493 | #endif |
1494 | #ifdef CONFIG_PERF_EVENTS | 1494 | #ifdef CONFIG_PERF_EVENTS |
1495 | struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; | 1495 | struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; |
1496 | struct mutex perf_event_mutex; | 1496 | struct mutex perf_event_mutex; |
1497 | struct list_head perf_event_list; | 1497 | struct list_head perf_event_list; |
1498 | #endif | 1498 | #endif |
1499 | #ifdef CONFIG_NUMA | 1499 | #ifdef CONFIG_NUMA |
1500 | struct mempolicy *mempolicy; /* Protected by alloc_lock */ | 1500 | struct mempolicy *mempolicy; /* Protected by alloc_lock */ |
1501 | short il_next; | 1501 | short il_next; |
1502 | short pref_node_fork; | 1502 | short pref_node_fork; |
1503 | #endif | 1503 | #endif |
1504 | struct rcu_head rcu; | 1504 | struct rcu_head rcu; |
1505 | 1505 | ||
1506 | /* | 1506 | /* |
1507 | * cache last used pipe for splice | 1507 | * cache last used pipe for splice |
1508 | */ | 1508 | */ |
1509 | struct pipe_inode_info *splice_pipe; | 1509 | struct pipe_inode_info *splice_pipe; |
1510 | 1510 | ||
1511 | struct page_frag task_frag; | 1511 | struct page_frag task_frag; |
1512 | 1512 | ||
1513 | #ifdef CONFIG_TASK_DELAY_ACCT | 1513 | #ifdef CONFIG_TASK_DELAY_ACCT |
1514 | struct task_delay_info *delays; | 1514 | struct task_delay_info *delays; |
1515 | #endif | 1515 | #endif |
1516 | #ifdef CONFIG_FAULT_INJECTION | 1516 | #ifdef CONFIG_FAULT_INJECTION |
1517 | int make_it_fail; | 1517 | int make_it_fail; |
1518 | #endif | 1518 | #endif |
1519 | /* | 1519 | /* |
1520 | * when (nr_dirtied >= nr_dirtied_pause), it's time to call | 1520 | * when (nr_dirtied >= nr_dirtied_pause), it's time to call |
1521 | * balance_dirty_pages() for some dirty throttling pause | 1521 | * balance_dirty_pages() for some dirty throttling pause |
1522 | */ | 1522 | */ |
1523 | int nr_dirtied; | 1523 | int nr_dirtied; |
1524 | int nr_dirtied_pause; | 1524 | int nr_dirtied_pause; |
1525 | unsigned long dirty_paused_when; /* start of a write-and-pause period */ | 1525 | unsigned long dirty_paused_when; /* start of a write-and-pause period */ |
1526 | 1526 | ||
1527 | #ifdef CONFIG_LATENCYTOP | 1527 | #ifdef CONFIG_LATENCYTOP |
1528 | int latency_record_count; | 1528 | int latency_record_count; |
1529 | struct latency_record latency_record[LT_SAVECOUNT]; | 1529 | struct latency_record latency_record[LT_SAVECOUNT]; |
1530 | #endif | 1530 | #endif |
1531 | /* | 1531 | /* |
1532 | * time slack values; these are used to round up poll() and | 1532 | * time slack values; these are used to round up poll() and |
1533 | * select() etc timeout values. These are in nanoseconds. | 1533 | * select() etc timeout values. These are in nanoseconds. |
1534 | */ | 1534 | */ |
1535 | unsigned long timer_slack_ns; | 1535 | unsigned long timer_slack_ns; |
1536 | unsigned long default_timer_slack_ns; | 1536 | unsigned long default_timer_slack_ns; |
1537 | 1537 | ||
1538 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1538 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1539 | /* Index of current stored address in ret_stack */ | 1539 | /* Index of current stored address in ret_stack */ |
1540 | int curr_ret_stack; | 1540 | int curr_ret_stack; |
1541 | /* Stack of return addresses for return function tracing */ | 1541 | /* Stack of return addresses for return function tracing */ |
1542 | struct ftrace_ret_stack *ret_stack; | 1542 | struct ftrace_ret_stack *ret_stack; |
1543 | /* time stamp for last schedule */ | 1543 | /* time stamp for last schedule */ |
1544 | unsigned long long ftrace_timestamp; | 1544 | unsigned long long ftrace_timestamp; |
1545 | /* | 1545 | /* |
1546 | * Number of functions that haven't been traced | 1546 | * Number of functions that haven't been traced |
1547 | * because of depth overrun. | 1547 | * because of depth overrun. |
1548 | */ | 1548 | */ |
1549 | atomic_t trace_overrun; | 1549 | atomic_t trace_overrun; |
1550 | /* Pause for the tracing */ | 1550 | /* Pause for the tracing */ |
1551 | atomic_t tracing_graph_pause; | 1551 | atomic_t tracing_graph_pause; |
1552 | #endif | 1552 | #endif |
1553 | #ifdef CONFIG_TRACING | 1553 | #ifdef CONFIG_TRACING |
1554 | /* state flags for use by tracers */ | 1554 | /* state flags for use by tracers */ |
1555 | unsigned long trace; | 1555 | unsigned long trace; |
1556 | /* bitmask and counter of trace recursion */ | 1556 | /* bitmask and counter of trace recursion */ |
1557 | unsigned long trace_recursion; | 1557 | unsigned long trace_recursion; |
1558 | #endif /* CONFIG_TRACING */ | 1558 | #endif /* CONFIG_TRACING */ |
1559 | #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ | 1559 | #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ |
1560 | struct memcg_batch_info { | 1560 | struct memcg_batch_info { |
1561 | int do_batch; /* incremented when batch uncharge started */ | 1561 | int do_batch; /* incremented when batch uncharge started */ |
1562 | struct mem_cgroup *memcg; /* target memcg of uncharge */ | 1562 | struct mem_cgroup *memcg; /* target memcg of uncharge */ |
1563 | unsigned long nr_pages; /* uncharged usage */ | 1563 | unsigned long nr_pages; /* uncharged usage */ |
1564 | unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ | 1564 | unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ |
1565 | } memcg_batch; | 1565 | } memcg_batch; |
1566 | #endif | 1566 | #endif |
1567 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 1567 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
1568 | atomic_t ptrace_bp_refcnt; | 1568 | atomic_t ptrace_bp_refcnt; |
1569 | #endif | 1569 | #endif |
1570 | #ifdef CONFIG_UPROBES | 1570 | #ifdef CONFIG_UPROBES |
1571 | struct uprobe_task *utask; | 1571 | struct uprobe_task *utask; |
1572 | #endif | 1572 | #endif |
1573 | }; | 1573 | }; |
1574 | 1574 | ||
1575 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1575 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |
1576 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | 1576 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) |
1577 | 1577 | ||
1578 | /* | 1578 | /* |
1579 | * Priority of a process goes from 0..MAX_PRIO-1, valid RT | 1579 | * Priority of a process goes from 0..MAX_PRIO-1, valid RT |
1580 | * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH | 1580 | * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH |
1581 | * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority | 1581 | * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority |
1582 | * values are inverted: lower p->prio value means higher priority. | 1582 | * values are inverted: lower p->prio value means higher priority. |
1583 | * | 1583 | * |
1584 | * The MAX_USER_RT_PRIO value allows the actual maximum | 1584 | * The MAX_USER_RT_PRIO value allows the actual maximum |
1585 | * RT priority to be separate from the value exported to | 1585 | * RT priority to be separate from the value exported to |
1586 | * user-space. This allows kernel threads to set their | 1586 | * user-space. This allows kernel threads to set their |
1587 | * priority to a value higher than any user task. Note: | 1587 | * priority to a value higher than any user task. Note: |
1588 | * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. | 1588 | * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. |
1589 | */ | 1589 | */ |
1590 | 1590 | ||
1591 | #define MAX_USER_RT_PRIO 100 | 1591 | #define MAX_USER_RT_PRIO 100 |
1592 | #define MAX_RT_PRIO MAX_USER_RT_PRIO | 1592 | #define MAX_RT_PRIO MAX_USER_RT_PRIO |
1593 | 1593 | ||
1594 | #define MAX_PRIO (MAX_RT_PRIO + 40) | 1594 | #define MAX_PRIO (MAX_RT_PRIO + 40) |
1595 | #define DEFAULT_PRIO (MAX_RT_PRIO + 20) | 1595 | #define DEFAULT_PRIO (MAX_RT_PRIO + 20) |
1596 | 1596 | ||
1597 | static inline int rt_prio(int prio) | 1597 | static inline int rt_prio(int prio) |
1598 | { | 1598 | { |
1599 | if (unlikely(prio < MAX_RT_PRIO)) | 1599 | if (unlikely(prio < MAX_RT_PRIO)) |
1600 | return 1; | 1600 | return 1; |
1601 | return 0; | 1601 | return 0; |
1602 | } | 1602 | } |
1603 | 1603 | ||
1604 | static inline int rt_task(struct task_struct *p) | 1604 | static inline int rt_task(struct task_struct *p) |
1605 | { | 1605 | { |
1606 | return rt_prio(p->prio); | 1606 | return rt_prio(p->prio); |
1607 | } | 1607 | } |
1608 | 1608 | ||
1609 | static inline struct pid *task_pid(struct task_struct *task) | 1609 | static inline struct pid *task_pid(struct task_struct *task) |
1610 | { | 1610 | { |
1611 | return task->pids[PIDTYPE_PID].pid; | 1611 | return task->pids[PIDTYPE_PID].pid; |
1612 | } | 1612 | } |
1613 | 1613 | ||
1614 | static inline struct pid *task_tgid(struct task_struct *task) | 1614 | static inline struct pid *task_tgid(struct task_struct *task) |
1615 | { | 1615 | { |
1616 | return task->group_leader->pids[PIDTYPE_PID].pid; | 1616 | return task->group_leader->pids[PIDTYPE_PID].pid; |
1617 | } | 1617 | } |
1618 | 1618 | ||
1619 | /* | 1619 | /* |
1620 | * Without tasklist or rcu lock it is not safe to dereference | 1620 | * Without tasklist or rcu lock it is not safe to dereference |
1621 | * the result of task_pgrp/task_session even if task == current, | 1621 | * the result of task_pgrp/task_session even if task == current, |
1622 | * we can race with another thread doing sys_setsid/sys_setpgid. | 1622 | * we can race with another thread doing sys_setsid/sys_setpgid. |
1623 | */ | 1623 | */ |
1624 | static inline struct pid *task_pgrp(struct task_struct *task) | 1624 | static inline struct pid *task_pgrp(struct task_struct *task) |
1625 | { | 1625 | { |
1626 | return task->group_leader->pids[PIDTYPE_PGID].pid; | 1626 | return task->group_leader->pids[PIDTYPE_PGID].pid; |
1627 | } | 1627 | } |
1628 | 1628 | ||
1629 | static inline struct pid *task_session(struct task_struct *task) | 1629 | static inline struct pid *task_session(struct task_struct *task) |
1630 | { | 1630 | { |
1631 | return task->group_leader->pids[PIDTYPE_SID].pid; | 1631 | return task->group_leader->pids[PIDTYPE_SID].pid; |
1632 | } | 1632 | } |
1633 | 1633 | ||
1634 | struct pid_namespace; | 1634 | struct pid_namespace; |
1635 | 1635 | ||
1636 | /* | 1636 | /* |
1637 | * the helpers to get the task's different pids as they are seen | 1637 | * the helpers to get the task's different pids as they are seen |
1638 | * from various namespaces | 1638 | * from various namespaces |
1639 | * | 1639 | * |
1640 | * task_xid_nr() : global id, i.e. the id seen from the init namespace; | 1640 | * task_xid_nr() : global id, i.e. the id seen from the init namespace; |
1641 | * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of | 1641 | * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of |
1642 | * current. | 1642 | * current. |
1643 | * task_xid_nr_ns() : id seen from the ns specified; | 1643 | * task_xid_nr_ns() : id seen from the ns specified; |
1644 | * | 1644 | * |
1645 | * set_task_vxid() : assigns a virtual id to a task; | 1645 | * set_task_vxid() : assigns a virtual id to a task; |
1646 | * | 1646 | * |
1647 | * see also pid_nr() etc in include/linux/pid.h | 1647 | * see also pid_nr() etc in include/linux/pid.h |
1648 | */ | 1648 | */ |
1649 | pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, | 1649 | pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, |
1650 | struct pid_namespace *ns); | 1650 | struct pid_namespace *ns); |
1651 | 1651 | ||
1652 | static inline pid_t task_pid_nr(struct task_struct *tsk) | 1652 | static inline pid_t task_pid_nr(struct task_struct *tsk) |
1653 | { | 1653 | { |
1654 | return tsk->pid; | 1654 | return tsk->pid; |
1655 | } | 1655 | } |
1656 | 1656 | ||
1657 | static inline pid_t task_pid_nr_ns(struct task_struct *tsk, | 1657 | static inline pid_t task_pid_nr_ns(struct task_struct *tsk, |
1658 | struct pid_namespace *ns) | 1658 | struct pid_namespace *ns) |
1659 | { | 1659 | { |
1660 | return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); | 1660 | return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); |
1661 | } | 1661 | } |
1662 | 1662 | ||
1663 | static inline pid_t task_pid_vnr(struct task_struct *tsk) | 1663 | static inline pid_t task_pid_vnr(struct task_struct *tsk) |
1664 | { | 1664 | { |
1665 | return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); | 1665 | return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL); |
1666 | } | 1666 | } |
1667 | 1667 | ||
1668 | 1668 | ||
1669 | static inline pid_t task_tgid_nr(struct task_struct *tsk) | 1669 | static inline pid_t task_tgid_nr(struct task_struct *tsk) |
1670 | { | 1670 | { |
1671 | return tsk->tgid; | 1671 | return tsk->tgid; |
1672 | } | 1672 | } |
1673 | 1673 | ||
1674 | pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); | 1674 | pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); |
1675 | 1675 | ||
1676 | static inline pid_t task_tgid_vnr(struct task_struct *tsk) | 1676 | static inline pid_t task_tgid_vnr(struct task_struct *tsk) |
1677 | { | 1677 | { |
1678 | return pid_vnr(task_tgid(tsk)); | 1678 | return pid_vnr(task_tgid(tsk)); |
1679 | } | 1679 | } |
1680 | 1680 | ||
1681 | 1681 | ||
1682 | static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, | 1682 | static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, |
1683 | struct pid_namespace *ns) | 1683 | struct pid_namespace *ns) |
1684 | { | 1684 | { |
1685 | return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); | 1685 | return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); |
1686 | } | 1686 | } |
1687 | 1687 | ||
1688 | static inline pid_t task_pgrp_vnr(struct task_struct *tsk) | 1688 | static inline pid_t task_pgrp_vnr(struct task_struct *tsk) |
1689 | { | 1689 | { |
1690 | return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); | 1690 | return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL); |
1691 | } | 1691 | } |
1692 | 1692 | ||
1693 | 1693 | ||
1694 | static inline pid_t task_session_nr_ns(struct task_struct *tsk, | 1694 | static inline pid_t task_session_nr_ns(struct task_struct *tsk, |
1695 | struct pid_namespace *ns) | 1695 | struct pid_namespace *ns) |
1696 | { | 1696 | { |
1697 | return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); | 1697 | return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); |
1698 | } | 1698 | } |
1699 | 1699 | ||
1700 | static inline pid_t task_session_vnr(struct task_struct *tsk) | 1700 | static inline pid_t task_session_vnr(struct task_struct *tsk) |
1701 | { | 1701 | { |
1702 | return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); | 1702 | return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); |
1703 | } | 1703 | } |
1704 | 1704 | ||
1705 | /* obsolete, do not use */ | 1705 | /* obsolete, do not use */ |
1706 | static inline pid_t task_pgrp_nr(struct task_struct *tsk) | 1706 | static inline pid_t task_pgrp_nr(struct task_struct *tsk) |
1707 | { | 1707 | { |
1708 | return task_pgrp_nr_ns(tsk, &init_pid_ns); | 1708 | return task_pgrp_nr_ns(tsk, &init_pid_ns); |
1709 | } | 1709 | } |
1710 | 1710 | ||
1711 | /** | 1711 | /** |
1712 | * pid_alive - check that a task structure is not stale | 1712 | * pid_alive - check that a task structure is not stale |
1713 | * @p: Task structure to be checked. | 1713 | * @p: Task structure to be checked. |
1714 | * | 1714 | * |
1715 | * Test if a process is not yet dead (at most zombie state) | 1715 | * Test if a process is not yet dead (at most zombie state) |
1716 | * If pid_alive fails, then pointers within the task structure | 1716 | * If pid_alive fails, then pointers within the task structure |
1717 | * can be stale and must not be dereferenced. | 1717 | * can be stale and must not be dereferenced. |
1718 | */ | 1718 | */ |
1719 | static inline int pid_alive(struct task_struct *p) | 1719 | static inline int pid_alive(struct task_struct *p) |
1720 | { | 1720 | { |
1721 | return p->pids[PIDTYPE_PID].pid != NULL; | 1721 | return p->pids[PIDTYPE_PID].pid != NULL; |
1722 | } | 1722 | } |
1723 | 1723 | ||
1724 | /** | 1724 | /** |
1725 | * is_global_init - check if a task structure is init | 1725 | * is_global_init - check if a task structure is init |
1726 | * @tsk: Task structure to be checked. | 1726 | * @tsk: Task structure to be checked. |
1727 | * | 1727 | * |
1728 | * Check if a task structure is the first user space task the kernel created. | 1728 | * Check if a task structure is the first user space task the kernel created. |
1729 | */ | 1729 | */ |
1730 | static inline int is_global_init(struct task_struct *tsk) | 1730 | static inline int is_global_init(struct task_struct *tsk) |
1731 | { | 1731 | { |
1732 | return tsk->pid == 1; | 1732 | return tsk->pid == 1; |
1733 | } | 1733 | } |
1734 | 1734 | ||
1735 | /* | 1735 | /* |
1736 | * is_container_init: | 1736 | * is_container_init: |
1737 | * check whether in the task is init in its own pid namespace. | 1737 | * check whether in the task is init in its own pid namespace. |
1738 | */ | 1738 | */ |
1739 | extern int is_container_init(struct task_struct *tsk); | 1739 | extern int is_container_init(struct task_struct *tsk); |
1740 | 1740 | ||
1741 | extern struct pid *cad_pid; | 1741 | extern struct pid *cad_pid; |
1742 | 1742 | ||
1743 | extern void free_task(struct task_struct *tsk); | 1743 | extern void free_task(struct task_struct *tsk); |
1744 | #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) | 1744 | #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) |
1745 | 1745 | ||
1746 | extern void __put_task_struct(struct task_struct *t); | 1746 | extern void __put_task_struct(struct task_struct *t); |
1747 | 1747 | ||
1748 | static inline void put_task_struct(struct task_struct *t) | 1748 | static inline void put_task_struct(struct task_struct *t) |
1749 | { | 1749 | { |
1750 | if (atomic_dec_and_test(&t->usage)) | 1750 | if (atomic_dec_and_test(&t->usage)) |
1751 | __put_task_struct(t); | 1751 | __put_task_struct(t); |
1752 | } | 1752 | } |
1753 | 1753 | ||
1754 | extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); | 1754 | extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); |
1755 | extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st); | 1755 | extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); |
1756 | 1756 | ||
1757 | /* | 1757 | /* |
1758 | * Per process flags | 1758 | * Per process flags |
1759 | */ | 1759 | */ |
1760 | #define PF_EXITING 0x00000004 /* getting shut down */ | 1760 | #define PF_EXITING 0x00000004 /* getting shut down */ |
1761 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | 1761 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ |
1762 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ | 1762 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ |
1763 | #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ | 1763 | #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ |
1764 | #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ | 1764 | #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ |
1765 | #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ | 1765 | #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ |
1766 | #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ | 1766 | #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ |
1767 | #define PF_DUMPCORE 0x00000200 /* dumped core */ | 1767 | #define PF_DUMPCORE 0x00000200 /* dumped core */ |
1768 | #define PF_SIGNALED 0x00000400 /* killed by a signal */ | 1768 | #define PF_SIGNALED 0x00000400 /* killed by a signal */ |
1769 | #define PF_MEMALLOC 0x00000800 /* Allocating memory */ | 1769 | #define PF_MEMALLOC 0x00000800 /* Allocating memory */ |
1770 | #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ | 1770 | #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ |
1771 | #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ | 1771 | #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ |
1772 | #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ | 1772 | #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ |
1773 | #define PF_FROZEN 0x00010000 /* frozen for system suspend */ | 1773 | #define PF_FROZEN 0x00010000 /* frozen for system suspend */ |
1774 | #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ | 1774 | #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ |
1775 | #define PF_KSWAPD 0x00040000 /* I am kswapd */ | 1775 | #define PF_KSWAPD 0x00040000 /* I am kswapd */ |
1776 | #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ | 1776 | #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ |
1777 | #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ | 1777 | #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ |
1778 | #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ | 1778 | #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ |
1779 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ | 1779 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ |
1780 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ | 1780 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ |
1781 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ | 1781 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ |
1782 | #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ | 1782 | #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ |
1783 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ | 1783 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ |
1784 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ | 1784 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ |
1785 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ | 1785 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ |
1786 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ | 1786 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ |
1787 | 1787 | ||
1788 | /* | 1788 | /* |
1789 | * Only the _current_ task can read/write to tsk->flags, but other | 1789 | * Only the _current_ task can read/write to tsk->flags, but other |
1790 | * tasks can access tsk->flags in readonly mode for example | 1790 | * tasks can access tsk->flags in readonly mode for example |
1791 | * with tsk_used_math (like during threaded core dumping). | 1791 | * with tsk_used_math (like during threaded core dumping). |
1792 | * There is however an exception to this rule during ptrace | 1792 | * There is however an exception to this rule during ptrace |
1793 | * or during fork: the ptracer task is allowed to write to the | 1793 | * or during fork: the ptracer task is allowed to write to the |
1794 | * child->flags of its traced child (same goes for fork, the parent | 1794 | * child->flags of its traced child (same goes for fork, the parent |
1795 | * can write to the child->flags), because we're guaranteed the | 1795 | * can write to the child->flags), because we're guaranteed the |
1796 | * child is not running and in turn not changing child->flags | 1796 | * child is not running and in turn not changing child->flags |
1797 | * at the same time the parent does it. | 1797 | * at the same time the parent does it. |
1798 | */ | 1798 | */ |
1799 | #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) | 1799 | #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) |
1800 | #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) | 1800 | #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) |
1801 | #define clear_used_math() clear_stopped_child_used_math(current) | 1801 | #define clear_used_math() clear_stopped_child_used_math(current) |
1802 | #define set_used_math() set_stopped_child_used_math(current) | 1802 | #define set_used_math() set_stopped_child_used_math(current) |
1803 | #define conditional_stopped_child_used_math(condition, child) \ | 1803 | #define conditional_stopped_child_used_math(condition, child) \ |
1804 | do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) | 1804 | do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) |
1805 | #define conditional_used_math(condition) \ | 1805 | #define conditional_used_math(condition) \ |
1806 | conditional_stopped_child_used_math(condition, current) | 1806 | conditional_stopped_child_used_math(condition, current) |
1807 | #define copy_to_stopped_child_used_math(child) \ | 1807 | #define copy_to_stopped_child_used_math(child) \ |
1808 | do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) | 1808 | do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) |
1809 | /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ | 1809 | /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ |
1810 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) | 1810 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) |
1811 | #define used_math() tsk_used_math(current) | 1811 | #define used_math() tsk_used_math(current) |
1812 | 1812 | ||
1813 | /* | 1813 | /* |
1814 | * task->jobctl flags | 1814 | * task->jobctl flags |
1815 | */ | 1815 | */ |
1816 | #define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ | 1816 | #define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ |
1817 | 1817 | ||
1818 | #define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ | 1818 | #define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ |
1819 | #define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ | 1819 | #define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ |
1820 | #define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ | 1820 | #define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ |
1821 | #define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ | 1821 | #define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ |
1822 | #define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ | 1822 | #define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ |
1823 | #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ | 1823 | #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ |
1824 | #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ | 1824 | #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ |
1825 | 1825 | ||
1826 | #define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT) | 1826 | #define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT) |
1827 | #define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT) | 1827 | #define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT) |
1828 | #define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT) | 1828 | #define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT) |
1829 | #define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT) | 1829 | #define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT) |
1830 | #define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT) | 1830 | #define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT) |
1831 | #define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT) | 1831 | #define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT) |
1832 | #define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT) | 1832 | #define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT) |
1833 | 1833 | ||
1834 | #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) | 1834 | #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) |
1835 | #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) | 1835 | #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) |
1836 | 1836 | ||
1837 | extern bool task_set_jobctl_pending(struct task_struct *task, | 1837 | extern bool task_set_jobctl_pending(struct task_struct *task, |
1838 | unsigned int mask); | 1838 | unsigned int mask); |
1839 | extern void task_clear_jobctl_trapping(struct task_struct *task); | 1839 | extern void task_clear_jobctl_trapping(struct task_struct *task); |
1840 | extern void task_clear_jobctl_pending(struct task_struct *task, | 1840 | extern void task_clear_jobctl_pending(struct task_struct *task, |
1841 | unsigned int mask); | 1841 | unsigned int mask); |
1842 | 1842 | ||
1843 | #ifdef CONFIG_PREEMPT_RCU | 1843 | #ifdef CONFIG_PREEMPT_RCU |
1844 | 1844 | ||
1845 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ | 1845 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ |
1846 | #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ | 1846 | #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ |
1847 | 1847 | ||
1848 | static inline void rcu_copy_process(struct task_struct *p) | 1848 | static inline void rcu_copy_process(struct task_struct *p) |
1849 | { | 1849 | { |
1850 | p->rcu_read_lock_nesting = 0; | 1850 | p->rcu_read_lock_nesting = 0; |
1851 | p->rcu_read_unlock_special = 0; | 1851 | p->rcu_read_unlock_special = 0; |
1852 | #ifdef CONFIG_TREE_PREEMPT_RCU | 1852 | #ifdef CONFIG_TREE_PREEMPT_RCU |
1853 | p->rcu_blocked_node = NULL; | 1853 | p->rcu_blocked_node = NULL; |
1854 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1854 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
1855 | #ifdef CONFIG_RCU_BOOST | 1855 | #ifdef CONFIG_RCU_BOOST |
1856 | p->rcu_boost_mutex = NULL; | 1856 | p->rcu_boost_mutex = NULL; |
1857 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 1857 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
1858 | INIT_LIST_HEAD(&p->rcu_node_entry); | 1858 | INIT_LIST_HEAD(&p->rcu_node_entry); |
1859 | } | 1859 | } |
1860 | 1860 | ||
1861 | #else | 1861 | #else |
1862 | 1862 | ||
1863 | static inline void rcu_copy_process(struct task_struct *p) | 1863 | static inline void rcu_copy_process(struct task_struct *p) |
1864 | { | 1864 | { |
1865 | } | 1865 | } |
1866 | 1866 | ||
1867 | #endif | 1867 | #endif |
1868 | 1868 | ||
1869 | static inline void rcu_switch(struct task_struct *prev, | 1869 | static inline void rcu_switch(struct task_struct *prev, |
1870 | struct task_struct *next) | 1870 | struct task_struct *next) |
1871 | { | 1871 | { |
1872 | #ifdef CONFIG_RCU_USER_QS | 1872 | #ifdef CONFIG_RCU_USER_QS |
1873 | rcu_user_hooks_switch(prev, next); | 1873 | rcu_user_hooks_switch(prev, next); |
1874 | #endif | 1874 | #endif |
1875 | } | 1875 | } |
1876 | 1876 | ||
1877 | static inline void tsk_restore_flags(struct task_struct *task, | 1877 | static inline void tsk_restore_flags(struct task_struct *task, |
1878 | unsigned long orig_flags, unsigned long flags) | 1878 | unsigned long orig_flags, unsigned long flags) |
1879 | { | 1879 | { |
1880 | task->flags &= ~flags; | 1880 | task->flags &= ~flags; |
1881 | task->flags |= orig_flags & flags; | 1881 | task->flags |= orig_flags & flags; |
1882 | } | 1882 | } |
1883 | 1883 | ||
1884 | #ifdef CONFIG_SMP | 1884 | #ifdef CONFIG_SMP |
1885 | extern void do_set_cpus_allowed(struct task_struct *p, | 1885 | extern void do_set_cpus_allowed(struct task_struct *p, |
1886 | const struct cpumask *new_mask); | 1886 | const struct cpumask *new_mask); |
1887 | 1887 | ||
1888 | extern int set_cpus_allowed_ptr(struct task_struct *p, | 1888 | extern int set_cpus_allowed_ptr(struct task_struct *p, |
1889 | const struct cpumask *new_mask); | 1889 | const struct cpumask *new_mask); |
1890 | #else | 1890 | #else |
1891 | static inline void do_set_cpus_allowed(struct task_struct *p, | 1891 | static inline void do_set_cpus_allowed(struct task_struct *p, |
1892 | const struct cpumask *new_mask) | 1892 | const struct cpumask *new_mask) |
1893 | { | 1893 | { |
1894 | } | 1894 | } |
1895 | static inline int set_cpus_allowed_ptr(struct task_struct *p, | 1895 | static inline int set_cpus_allowed_ptr(struct task_struct *p, |
1896 | const struct cpumask *new_mask) | 1896 | const struct cpumask *new_mask) |
1897 | { | 1897 | { |
1898 | if (!cpumask_test_cpu(0, new_mask)) | 1898 | if (!cpumask_test_cpu(0, new_mask)) |
1899 | return -EINVAL; | 1899 | return -EINVAL; |
1900 | return 0; | 1900 | return 0; |
1901 | } | 1901 | } |
1902 | #endif | 1902 | #endif |
1903 | 1903 | ||
1904 | #ifdef CONFIG_NO_HZ | 1904 | #ifdef CONFIG_NO_HZ |
1905 | void calc_load_enter_idle(void); | 1905 | void calc_load_enter_idle(void); |
1906 | void calc_load_exit_idle(void); | 1906 | void calc_load_exit_idle(void); |
1907 | #else | 1907 | #else |
1908 | static inline void calc_load_enter_idle(void) { } | 1908 | static inline void calc_load_enter_idle(void) { } |
1909 | static inline void calc_load_exit_idle(void) { } | 1909 | static inline void calc_load_exit_idle(void) { } |
1910 | #endif /* CONFIG_NO_HZ */ | 1910 | #endif /* CONFIG_NO_HZ */ |
1911 | 1911 | ||
1912 | #ifndef CONFIG_CPUMASK_OFFSTACK | 1912 | #ifndef CONFIG_CPUMASK_OFFSTACK |
1913 | static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) | 1913 | static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) |
1914 | { | 1914 | { |
1915 | return set_cpus_allowed_ptr(p, &new_mask); | 1915 | return set_cpus_allowed_ptr(p, &new_mask); |
1916 | } | 1916 | } |
1917 | #endif | 1917 | #endif |
1918 | 1918 | ||
1919 | /* | 1919 | /* |
1920 | * Do not use outside of architecture code which knows its limitations. | 1920 | * Do not use outside of architecture code which knows its limitations. |
1921 | * | 1921 | * |
1922 | * sched_clock() has no promise of monotonicity or bounded drift between | 1922 | * sched_clock() has no promise of monotonicity or bounded drift between |
1923 | * CPUs, use (which you should not) requires disabling IRQs. | 1923 | * CPUs, use (which you should not) requires disabling IRQs. |
1924 | * | 1924 | * |
1925 | * Please use one of the three interfaces below. | 1925 | * Please use one of the three interfaces below. |
1926 | */ | 1926 | */ |
1927 | extern unsigned long long notrace sched_clock(void); | 1927 | extern unsigned long long notrace sched_clock(void); |
1928 | /* | 1928 | /* |
1929 | * See the comment in kernel/sched/clock.c | 1929 | * See the comment in kernel/sched/clock.c |
1930 | */ | 1930 | */ |
1931 | extern u64 cpu_clock(int cpu); | 1931 | extern u64 cpu_clock(int cpu); |
1932 | extern u64 local_clock(void); | 1932 | extern u64 local_clock(void); |
1933 | extern u64 sched_clock_cpu(int cpu); | 1933 | extern u64 sched_clock_cpu(int cpu); |
1934 | 1934 | ||
1935 | 1935 | ||
1936 | extern void sched_clock_init(void); | 1936 | extern void sched_clock_init(void); |
1937 | 1937 | ||
1938 | #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 1938 | #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
1939 | static inline void sched_clock_tick(void) | 1939 | static inline void sched_clock_tick(void) |
1940 | { | 1940 | { |
1941 | } | 1941 | } |
1942 | 1942 | ||
1943 | static inline void sched_clock_idle_sleep_event(void) | 1943 | static inline void sched_clock_idle_sleep_event(void) |
1944 | { | 1944 | { |
1945 | } | 1945 | } |
1946 | 1946 | ||
1947 | static inline void sched_clock_idle_wakeup_event(u64 delta_ns) | 1947 | static inline void sched_clock_idle_wakeup_event(u64 delta_ns) |
1948 | { | 1948 | { |
1949 | } | 1949 | } |
1950 | #else | 1950 | #else |
1951 | /* | 1951 | /* |
1952 | * Architectures can set this to 1 if they have specified | 1952 | * Architectures can set this to 1 if they have specified |
1953 | * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, | 1953 | * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, |
1954 | * but then during bootup it turns out that sched_clock() | 1954 | * but then during bootup it turns out that sched_clock() |
1955 | * is reliable after all: | 1955 | * is reliable after all: |
1956 | */ | 1956 | */ |
1957 | extern int sched_clock_stable; | 1957 | extern int sched_clock_stable; |
1958 | 1958 | ||
1959 | extern void sched_clock_tick(void); | 1959 | extern void sched_clock_tick(void); |
1960 | extern void sched_clock_idle_sleep_event(void); | 1960 | extern void sched_clock_idle_sleep_event(void); |
1961 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); | 1961 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); |
1962 | #endif | 1962 | #endif |
1963 | 1963 | ||
1964 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 1964 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
1965 | /* | 1965 | /* |
1966 | * An i/f to runtime opt-in for irq time accounting based off of sched_clock. | 1966 | * An i/f to runtime opt-in for irq time accounting based off of sched_clock. |
1967 | * The reason for this explicit opt-in is not to have perf penalty with | 1967 | * The reason for this explicit opt-in is not to have perf penalty with |
1968 | * slow sched_clocks. | 1968 | * slow sched_clocks. |
1969 | */ | 1969 | */ |
1970 | extern void enable_sched_clock_irqtime(void); | 1970 | extern void enable_sched_clock_irqtime(void); |
1971 | extern void disable_sched_clock_irqtime(void); | 1971 | extern void disable_sched_clock_irqtime(void); |
1972 | #else | 1972 | #else |
1973 | static inline void enable_sched_clock_irqtime(void) {} | 1973 | static inline void enable_sched_clock_irqtime(void) {} |
1974 | static inline void disable_sched_clock_irqtime(void) {} | 1974 | static inline void disable_sched_clock_irqtime(void) {} |
1975 | #endif | 1975 | #endif |
1976 | 1976 | ||
1977 | extern unsigned long long | 1977 | extern unsigned long long |
1978 | task_sched_runtime(struct task_struct *task); | 1978 | task_sched_runtime(struct task_struct *task); |
1979 | 1979 | ||
1980 | /* sched_exec is called by processes performing an exec */ | 1980 | /* sched_exec is called by processes performing an exec */ |
1981 | #ifdef CONFIG_SMP | 1981 | #ifdef CONFIG_SMP |
1982 | extern void sched_exec(void); | 1982 | extern void sched_exec(void); |
1983 | #else | 1983 | #else |
1984 | #define sched_exec() {} | 1984 | #define sched_exec() {} |
1985 | #endif | 1985 | #endif |
1986 | 1986 | ||
1987 | extern void sched_clock_idle_sleep_event(void); | 1987 | extern void sched_clock_idle_sleep_event(void); |
1988 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); | 1988 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); |
1989 | 1989 | ||
1990 | #ifdef CONFIG_HOTPLUG_CPU | 1990 | #ifdef CONFIG_HOTPLUG_CPU |
1991 | extern void idle_task_exit(void); | 1991 | extern void idle_task_exit(void); |
1992 | #else | 1992 | #else |
1993 | static inline void idle_task_exit(void) {} | 1993 | static inline void idle_task_exit(void) {} |
1994 | #endif | 1994 | #endif |
1995 | 1995 | ||
1996 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | 1996 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) |
1997 | extern void wake_up_idle_cpu(int cpu); | 1997 | extern void wake_up_idle_cpu(int cpu); |
1998 | #else | 1998 | #else |
1999 | static inline void wake_up_idle_cpu(int cpu) { } | 1999 | static inline void wake_up_idle_cpu(int cpu) { } |
2000 | #endif | 2000 | #endif |
2001 | 2001 | ||
2002 | extern unsigned int sysctl_sched_latency; | 2002 | extern unsigned int sysctl_sched_latency; |
2003 | extern unsigned int sysctl_sched_min_granularity; | 2003 | extern unsigned int sysctl_sched_min_granularity; |
2004 | extern unsigned int sysctl_sched_wakeup_granularity; | 2004 | extern unsigned int sysctl_sched_wakeup_granularity; |
2005 | extern unsigned int sysctl_sched_child_runs_first; | 2005 | extern unsigned int sysctl_sched_child_runs_first; |
2006 | 2006 | ||
2007 | enum sched_tunable_scaling { | 2007 | enum sched_tunable_scaling { |
2008 | SCHED_TUNABLESCALING_NONE, | 2008 | SCHED_TUNABLESCALING_NONE, |
2009 | SCHED_TUNABLESCALING_LOG, | 2009 | SCHED_TUNABLESCALING_LOG, |
2010 | SCHED_TUNABLESCALING_LINEAR, | 2010 | SCHED_TUNABLESCALING_LINEAR, |
2011 | SCHED_TUNABLESCALING_END, | 2011 | SCHED_TUNABLESCALING_END, |
2012 | }; | 2012 | }; |
2013 | extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; | 2013 | extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; |
2014 | 2014 | ||
2015 | #ifdef CONFIG_SCHED_DEBUG | 2015 | #ifdef CONFIG_SCHED_DEBUG |
2016 | extern unsigned int sysctl_sched_migration_cost; | 2016 | extern unsigned int sysctl_sched_migration_cost; |
2017 | extern unsigned int sysctl_sched_nr_migrate; | 2017 | extern unsigned int sysctl_sched_nr_migrate; |
2018 | extern unsigned int sysctl_sched_time_avg; | 2018 | extern unsigned int sysctl_sched_time_avg; |
2019 | extern unsigned int sysctl_timer_migration; | 2019 | extern unsigned int sysctl_timer_migration; |
2020 | extern unsigned int sysctl_sched_shares_window; | 2020 | extern unsigned int sysctl_sched_shares_window; |
2021 | 2021 | ||
2022 | int sched_proc_update_handler(struct ctl_table *table, int write, | 2022 | int sched_proc_update_handler(struct ctl_table *table, int write, |
2023 | void __user *buffer, size_t *length, | 2023 | void __user *buffer, size_t *length, |
2024 | loff_t *ppos); | 2024 | loff_t *ppos); |
2025 | #endif | 2025 | #endif |
2026 | #ifdef CONFIG_SCHED_DEBUG | 2026 | #ifdef CONFIG_SCHED_DEBUG |
2027 | static inline unsigned int get_sysctl_timer_migration(void) | 2027 | static inline unsigned int get_sysctl_timer_migration(void) |
2028 | { | 2028 | { |
2029 | return sysctl_timer_migration; | 2029 | return sysctl_timer_migration; |
2030 | } | 2030 | } |
2031 | #else | 2031 | #else |
2032 | static inline unsigned int get_sysctl_timer_migration(void) | 2032 | static inline unsigned int get_sysctl_timer_migration(void) |
2033 | { | 2033 | { |
2034 | return 1; | 2034 | return 1; |
2035 | } | 2035 | } |
2036 | #endif | 2036 | #endif |
2037 | extern unsigned int sysctl_sched_rt_period; | 2037 | extern unsigned int sysctl_sched_rt_period; |
2038 | extern int sysctl_sched_rt_runtime; | 2038 | extern int sysctl_sched_rt_runtime; |
2039 | 2039 | ||
2040 | int sched_rt_handler(struct ctl_table *table, int write, | 2040 | int sched_rt_handler(struct ctl_table *table, int write, |
2041 | void __user *buffer, size_t *lenp, | 2041 | void __user *buffer, size_t *lenp, |
2042 | loff_t *ppos); | 2042 | loff_t *ppos); |
2043 | 2043 | ||
2044 | #ifdef CONFIG_SCHED_AUTOGROUP | 2044 | #ifdef CONFIG_SCHED_AUTOGROUP |
2045 | extern unsigned int sysctl_sched_autogroup_enabled; | 2045 | extern unsigned int sysctl_sched_autogroup_enabled; |
2046 | 2046 | ||
2047 | extern void sched_autogroup_create_attach(struct task_struct *p); | 2047 | extern void sched_autogroup_create_attach(struct task_struct *p); |
2048 | extern void sched_autogroup_detach(struct task_struct *p); | 2048 | extern void sched_autogroup_detach(struct task_struct *p); |
2049 | extern void sched_autogroup_fork(struct signal_struct *sig); | 2049 | extern void sched_autogroup_fork(struct signal_struct *sig); |
2050 | extern void sched_autogroup_exit(struct signal_struct *sig); | 2050 | extern void sched_autogroup_exit(struct signal_struct *sig); |
2051 | #ifdef CONFIG_PROC_FS | 2051 | #ifdef CONFIG_PROC_FS |
2052 | extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); | 2052 | extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); |
2053 | extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); | 2053 | extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); |
2054 | #endif | 2054 | #endif |
2055 | #else | 2055 | #else |
2056 | static inline void sched_autogroup_create_attach(struct task_struct *p) { } | 2056 | static inline void sched_autogroup_create_attach(struct task_struct *p) { } |
2057 | static inline void sched_autogroup_detach(struct task_struct *p) { } | 2057 | static inline void sched_autogroup_detach(struct task_struct *p) { } |
2058 | static inline void sched_autogroup_fork(struct signal_struct *sig) { } | 2058 | static inline void sched_autogroup_fork(struct signal_struct *sig) { } |
2059 | static inline void sched_autogroup_exit(struct signal_struct *sig) { } | 2059 | static inline void sched_autogroup_exit(struct signal_struct *sig) { } |
2060 | #endif | 2060 | #endif |
2061 | 2061 | ||
2062 | #ifdef CONFIG_CFS_BANDWIDTH | 2062 | #ifdef CONFIG_CFS_BANDWIDTH |
2063 | extern unsigned int sysctl_sched_cfs_bandwidth_slice; | 2063 | extern unsigned int sysctl_sched_cfs_bandwidth_slice; |
2064 | #endif | 2064 | #endif |
2065 | 2065 | ||
2066 | #ifdef CONFIG_RT_MUTEXES | 2066 | #ifdef CONFIG_RT_MUTEXES |
2067 | extern int rt_mutex_getprio(struct task_struct *p); | 2067 | extern int rt_mutex_getprio(struct task_struct *p); |
2068 | extern void rt_mutex_setprio(struct task_struct *p, int prio); | 2068 | extern void rt_mutex_setprio(struct task_struct *p, int prio); |
2069 | extern void rt_mutex_adjust_pi(struct task_struct *p); | 2069 | extern void rt_mutex_adjust_pi(struct task_struct *p); |
2070 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) | 2070 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) |
2071 | { | 2071 | { |
2072 | return tsk->pi_blocked_on != NULL; | 2072 | return tsk->pi_blocked_on != NULL; |
2073 | } | 2073 | } |
2074 | #else | 2074 | #else |
2075 | static inline int rt_mutex_getprio(struct task_struct *p) | 2075 | static inline int rt_mutex_getprio(struct task_struct *p) |
2076 | { | 2076 | { |
2077 | return p->normal_prio; | 2077 | return p->normal_prio; |
2078 | } | 2078 | } |
2079 | # define rt_mutex_adjust_pi(p) do { } while (0) | 2079 | # define rt_mutex_adjust_pi(p) do { } while (0) |
2080 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) | 2080 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) |
2081 | { | 2081 | { |
2082 | return false; | 2082 | return false; |
2083 | } | 2083 | } |
2084 | #endif | 2084 | #endif |
2085 | 2085 | ||
2086 | extern bool yield_to(struct task_struct *p, bool preempt); | 2086 | extern bool yield_to(struct task_struct *p, bool preempt); |
2087 | extern void set_user_nice(struct task_struct *p, long nice); | 2087 | extern void set_user_nice(struct task_struct *p, long nice); |
2088 | extern int task_prio(const struct task_struct *p); | 2088 | extern int task_prio(const struct task_struct *p); |
2089 | extern int task_nice(const struct task_struct *p); | 2089 | extern int task_nice(const struct task_struct *p); |
2090 | extern int can_nice(const struct task_struct *p, const int nice); | 2090 | extern int can_nice(const struct task_struct *p, const int nice); |
2091 | extern int task_curr(const struct task_struct *p); | 2091 | extern int task_curr(const struct task_struct *p); |
2092 | extern int idle_cpu(int cpu); | 2092 | extern int idle_cpu(int cpu); |
2093 | extern int sched_setscheduler(struct task_struct *, int, | 2093 | extern int sched_setscheduler(struct task_struct *, int, |
2094 | const struct sched_param *); | 2094 | const struct sched_param *); |
2095 | extern int sched_setscheduler_nocheck(struct task_struct *, int, | 2095 | extern int sched_setscheduler_nocheck(struct task_struct *, int, |
2096 | const struct sched_param *); | 2096 | const struct sched_param *); |
2097 | extern struct task_struct *idle_task(int cpu); | 2097 | extern struct task_struct *idle_task(int cpu); |
2098 | /** | 2098 | /** |
2099 | * is_idle_task - is the specified task an idle task? | 2099 | * is_idle_task - is the specified task an idle task? |
2100 | * @p: the task in question. | 2100 | * @p: the task in question. |
2101 | */ | 2101 | */ |
2102 | static inline bool is_idle_task(const struct task_struct *p) | 2102 | static inline bool is_idle_task(const struct task_struct *p) |
2103 | { | 2103 | { |
2104 | return p->pid == 0; | 2104 | return p->pid == 0; |
2105 | } | 2105 | } |
2106 | extern struct task_struct *curr_task(int cpu); | 2106 | extern struct task_struct *curr_task(int cpu); |
2107 | extern void set_curr_task(int cpu, struct task_struct *p); | 2107 | extern void set_curr_task(int cpu, struct task_struct *p); |
2108 | 2108 | ||
2109 | void yield(void); | 2109 | void yield(void); |
2110 | 2110 | ||
2111 | /* | 2111 | /* |
2112 | * The default (Linux) execution domain. | 2112 | * The default (Linux) execution domain. |
2113 | */ | 2113 | */ |
2114 | extern struct exec_domain default_exec_domain; | 2114 | extern struct exec_domain default_exec_domain; |
2115 | 2115 | ||
2116 | union thread_union { | 2116 | union thread_union { |
2117 | struct thread_info thread_info; | 2117 | struct thread_info thread_info; |
2118 | unsigned long stack[THREAD_SIZE/sizeof(long)]; | 2118 | unsigned long stack[THREAD_SIZE/sizeof(long)]; |
2119 | }; | 2119 | }; |
2120 | 2120 | ||
2121 | #ifndef __HAVE_ARCH_KSTACK_END | 2121 | #ifndef __HAVE_ARCH_KSTACK_END |
2122 | static inline int kstack_end(void *addr) | 2122 | static inline int kstack_end(void *addr) |
2123 | { | 2123 | { |
2124 | /* Reliable end of stack detection: | 2124 | /* Reliable end of stack detection: |
2125 | * Some APM bios versions misalign the stack | 2125 | * Some APM bios versions misalign the stack |
2126 | */ | 2126 | */ |
2127 | return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); | 2127 | return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); |
2128 | } | 2128 | } |
2129 | #endif | 2129 | #endif |
2130 | 2130 | ||
2131 | extern union thread_union init_thread_union; | 2131 | extern union thread_union init_thread_union; |
2132 | extern struct task_struct init_task; | 2132 | extern struct task_struct init_task; |
2133 | 2133 | ||
2134 | extern struct mm_struct init_mm; | 2134 | extern struct mm_struct init_mm; |
2135 | 2135 | ||
2136 | extern struct pid_namespace init_pid_ns; | 2136 | extern struct pid_namespace init_pid_ns; |
2137 | 2137 | ||
2138 | /* | 2138 | /* |
2139 | * find a task by one of its numerical ids | 2139 | * find a task by one of its numerical ids |
2140 | * | 2140 | * |
2141 | * find_task_by_pid_ns(): | 2141 | * find_task_by_pid_ns(): |
2142 | * finds a task by its pid in the specified namespace | 2142 | * finds a task by its pid in the specified namespace |
2143 | * find_task_by_vpid(): | 2143 | * find_task_by_vpid(): |
2144 | * finds a task by its virtual pid | 2144 | * finds a task by its virtual pid |
2145 | * | 2145 | * |
2146 | * see also find_vpid() etc in include/linux/pid.h | 2146 | * see also find_vpid() etc in include/linux/pid.h |
2147 | */ | 2147 | */ |
2148 | 2148 | ||
2149 | extern struct task_struct *find_task_by_vpid(pid_t nr); | 2149 | extern struct task_struct *find_task_by_vpid(pid_t nr); |
2150 | extern struct task_struct *find_task_by_pid_ns(pid_t nr, | 2150 | extern struct task_struct *find_task_by_pid_ns(pid_t nr, |
2151 | struct pid_namespace *ns); | 2151 | struct pid_namespace *ns); |
2152 | 2152 | ||
2153 | extern void __set_special_pids(struct pid *pid); | 2153 | extern void __set_special_pids(struct pid *pid); |
2154 | 2154 | ||
2155 | /* per-UID process charging. */ | 2155 | /* per-UID process charging. */ |
2156 | extern struct user_struct * alloc_uid(kuid_t); | 2156 | extern struct user_struct * alloc_uid(kuid_t); |
2157 | static inline struct user_struct *get_uid(struct user_struct *u) | 2157 | static inline struct user_struct *get_uid(struct user_struct *u) |
2158 | { | 2158 | { |
2159 | atomic_inc(&u->__count); | 2159 | atomic_inc(&u->__count); |
2160 | return u; | 2160 | return u; |
2161 | } | 2161 | } |
2162 | extern void free_uid(struct user_struct *); | 2162 | extern void free_uid(struct user_struct *); |
2163 | 2163 | ||
2164 | #include <asm/current.h> | 2164 | #include <asm/current.h> |
2165 | 2165 | ||
2166 | extern void xtime_update(unsigned long ticks); | 2166 | extern void xtime_update(unsigned long ticks); |
2167 | 2167 | ||
2168 | extern int wake_up_state(struct task_struct *tsk, unsigned int state); | 2168 | extern int wake_up_state(struct task_struct *tsk, unsigned int state); |
2169 | extern int wake_up_process(struct task_struct *tsk); | 2169 | extern int wake_up_process(struct task_struct *tsk); |
2170 | extern void wake_up_new_task(struct task_struct *tsk); | 2170 | extern void wake_up_new_task(struct task_struct *tsk); |
2171 | #ifdef CONFIG_SMP | 2171 | #ifdef CONFIG_SMP |
2172 | extern void kick_process(struct task_struct *tsk); | 2172 | extern void kick_process(struct task_struct *tsk); |
2173 | #else | 2173 | #else |
2174 | static inline void kick_process(struct task_struct *tsk) { } | 2174 | static inline void kick_process(struct task_struct *tsk) { } |
2175 | #endif | 2175 | #endif |
2176 | extern void sched_fork(struct task_struct *p); | 2176 | extern void sched_fork(struct task_struct *p); |
2177 | extern void sched_dead(struct task_struct *p); | 2177 | extern void sched_dead(struct task_struct *p); |
2178 | 2178 | ||
2179 | extern void proc_caches_init(void); | 2179 | extern void proc_caches_init(void); |
2180 | extern void flush_signals(struct task_struct *); | 2180 | extern void flush_signals(struct task_struct *); |
2181 | extern void __flush_signals(struct task_struct *); | 2181 | extern void __flush_signals(struct task_struct *); |
2182 | extern void ignore_signals(struct task_struct *); | 2182 | extern void ignore_signals(struct task_struct *); |
2183 | extern void flush_signal_handlers(struct task_struct *, int force_default); | 2183 | extern void flush_signal_handlers(struct task_struct *, int force_default); |
2184 | extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); | 2184 | extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); |
2185 | 2185 | ||
2186 | static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | 2186 | static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) |
2187 | { | 2187 | { |
2188 | unsigned long flags; | 2188 | unsigned long flags; |
2189 | int ret; | 2189 | int ret; |
2190 | 2190 | ||
2191 | spin_lock_irqsave(&tsk->sighand->siglock, flags); | 2191 | spin_lock_irqsave(&tsk->sighand->siglock, flags); |
2192 | ret = dequeue_signal(tsk, mask, info); | 2192 | ret = dequeue_signal(tsk, mask, info); |
2193 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); | 2193 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); |
2194 | 2194 | ||
2195 | return ret; | 2195 | return ret; |
2196 | } | 2196 | } |
2197 | 2197 | ||
2198 | extern void block_all_signals(int (*notifier)(void *priv), void *priv, | 2198 | extern void block_all_signals(int (*notifier)(void *priv), void *priv, |
2199 | sigset_t *mask); | 2199 | sigset_t *mask); |
2200 | extern void unblock_all_signals(void); | 2200 | extern void unblock_all_signals(void); |
2201 | extern void release_task(struct task_struct * p); | 2201 | extern void release_task(struct task_struct * p); |
2202 | extern int send_sig_info(int, struct siginfo *, struct task_struct *); | 2202 | extern int send_sig_info(int, struct siginfo *, struct task_struct *); |
2203 | extern int force_sigsegv(int, struct task_struct *); | 2203 | extern int force_sigsegv(int, struct task_struct *); |
2204 | extern int force_sig_info(int, struct siginfo *, struct task_struct *); | 2204 | extern int force_sig_info(int, struct siginfo *, struct task_struct *); |
2205 | extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); | 2205 | extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); |
2206 | extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); | 2206 | extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); |
2207 | extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, | 2207 | extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, |
2208 | const struct cred *, u32); | 2208 | const struct cred *, u32); |
2209 | extern int kill_pgrp(struct pid *pid, int sig, int priv); | 2209 | extern int kill_pgrp(struct pid *pid, int sig, int priv); |
2210 | extern int kill_pid(struct pid *pid, int sig, int priv); | 2210 | extern int kill_pid(struct pid *pid, int sig, int priv); |
2211 | extern int kill_proc_info(int, struct siginfo *, pid_t); | 2211 | extern int kill_proc_info(int, struct siginfo *, pid_t); |
2212 | extern __must_check bool do_notify_parent(struct task_struct *, int); | 2212 | extern __must_check bool do_notify_parent(struct task_struct *, int); |
2213 | extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); | 2213 | extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); |
2214 | extern void force_sig(int, struct task_struct *); | 2214 | extern void force_sig(int, struct task_struct *); |
2215 | extern int send_sig(int, struct task_struct *, int); | 2215 | extern int send_sig(int, struct task_struct *, int); |
2216 | extern int zap_other_threads(struct task_struct *p); | 2216 | extern int zap_other_threads(struct task_struct *p); |
2217 | extern struct sigqueue *sigqueue_alloc(void); | 2217 | extern struct sigqueue *sigqueue_alloc(void); |
2218 | extern void sigqueue_free(struct sigqueue *); | 2218 | extern void sigqueue_free(struct sigqueue *); |
2219 | extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); | 2219 | extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); |
2220 | extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); | 2220 | extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); |
2221 | extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long); | 2221 | extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long); |
2222 | 2222 | ||
2223 | static inline void restore_saved_sigmask(void) | 2223 | static inline void restore_saved_sigmask(void) |
2224 | { | 2224 | { |
2225 | if (test_and_clear_restore_sigmask()) | 2225 | if (test_and_clear_restore_sigmask()) |
2226 | __set_current_blocked(¤t->saved_sigmask); | 2226 | __set_current_blocked(¤t->saved_sigmask); |
2227 | } | 2227 | } |
2228 | 2228 | ||
2229 | static inline sigset_t *sigmask_to_save(void) | 2229 | static inline sigset_t *sigmask_to_save(void) |
2230 | { | 2230 | { |
2231 | sigset_t *res = ¤t->blocked; | 2231 | sigset_t *res = ¤t->blocked; |
2232 | if (unlikely(test_restore_sigmask())) | 2232 | if (unlikely(test_restore_sigmask())) |
2233 | res = ¤t->saved_sigmask; | 2233 | res = ¤t->saved_sigmask; |
2234 | return res; | 2234 | return res; |
2235 | } | 2235 | } |
2236 | 2236 | ||
2237 | static inline int kill_cad_pid(int sig, int priv) | 2237 | static inline int kill_cad_pid(int sig, int priv) |
2238 | { | 2238 | { |
2239 | return kill_pid(cad_pid, sig, priv); | 2239 | return kill_pid(cad_pid, sig, priv); |
2240 | } | 2240 | } |
2241 | 2241 | ||
2242 | /* These can be the second arg to send_sig_info/send_group_sig_info. */ | 2242 | /* These can be the second arg to send_sig_info/send_group_sig_info. */ |
2243 | #define SEND_SIG_NOINFO ((struct siginfo *) 0) | 2243 | #define SEND_SIG_NOINFO ((struct siginfo *) 0) |
2244 | #define SEND_SIG_PRIV ((struct siginfo *) 1) | 2244 | #define SEND_SIG_PRIV ((struct siginfo *) 1) |
2245 | #define SEND_SIG_FORCED ((struct siginfo *) 2) | 2245 | #define SEND_SIG_FORCED ((struct siginfo *) 2) |
2246 | 2246 | ||
2247 | /* | 2247 | /* |
2248 | * True if we are on the alternate signal stack. | 2248 | * True if we are on the alternate signal stack. |
2249 | */ | 2249 | */ |
2250 | static inline int on_sig_stack(unsigned long sp) | 2250 | static inline int on_sig_stack(unsigned long sp) |
2251 | { | 2251 | { |
2252 | #ifdef CONFIG_STACK_GROWSUP | 2252 | #ifdef CONFIG_STACK_GROWSUP |
2253 | return sp >= current->sas_ss_sp && | 2253 | return sp >= current->sas_ss_sp && |
2254 | sp - current->sas_ss_sp < current->sas_ss_size; | 2254 | sp - current->sas_ss_sp < current->sas_ss_size; |
2255 | #else | 2255 | #else |
2256 | return sp > current->sas_ss_sp && | 2256 | return sp > current->sas_ss_sp && |
2257 | sp - current->sas_ss_sp <= current->sas_ss_size; | 2257 | sp - current->sas_ss_sp <= current->sas_ss_size; |
2258 | #endif | 2258 | #endif |
2259 | } | 2259 | } |
2260 | 2260 | ||
2261 | static inline int sas_ss_flags(unsigned long sp) | 2261 | static inline int sas_ss_flags(unsigned long sp) |
2262 | { | 2262 | { |
2263 | return (current->sas_ss_size == 0 ? SS_DISABLE | 2263 | return (current->sas_ss_size == 0 ? SS_DISABLE |
2264 | : on_sig_stack(sp) ? SS_ONSTACK : 0); | 2264 | : on_sig_stack(sp) ? SS_ONSTACK : 0); |
2265 | } | 2265 | } |
2266 | 2266 | ||
2267 | /* | 2267 | /* |
2268 | * Routines for handling mm_structs | 2268 | * Routines for handling mm_structs |
2269 | */ | 2269 | */ |
2270 | extern struct mm_struct * mm_alloc(void); | 2270 | extern struct mm_struct * mm_alloc(void); |
2271 | 2271 | ||
2272 | /* mmdrop drops the mm and the page tables */ | 2272 | /* mmdrop drops the mm and the page tables */ |
2273 | extern void __mmdrop(struct mm_struct *); | 2273 | extern void __mmdrop(struct mm_struct *); |
2274 | static inline void mmdrop(struct mm_struct * mm) | 2274 | static inline void mmdrop(struct mm_struct * mm) |
2275 | { | 2275 | { |
2276 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) | 2276 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) |
2277 | __mmdrop(mm); | 2277 | __mmdrop(mm); |
2278 | } | 2278 | } |
2279 | 2279 | ||
2280 | /* mmput gets rid of the mappings and all user-space */ | 2280 | /* mmput gets rid of the mappings and all user-space */ |
2281 | extern void mmput(struct mm_struct *); | 2281 | extern void mmput(struct mm_struct *); |
2282 | /* Grab a reference to a task's mm, if it is not already going away */ | 2282 | /* Grab a reference to a task's mm, if it is not already going away */ |
2283 | extern struct mm_struct *get_task_mm(struct task_struct *task); | 2283 | extern struct mm_struct *get_task_mm(struct task_struct *task); |
2284 | /* | 2284 | /* |
2285 | * Grab a reference to a task's mm, if it is not already going away | 2285 | * Grab a reference to a task's mm, if it is not already going away |
2286 | * and ptrace_may_access with the mode parameter passed to it | 2286 | * and ptrace_may_access with the mode parameter passed to it |
2287 | * succeeds. | 2287 | * succeeds. |
2288 | */ | 2288 | */ |
2289 | extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); | 2289 | extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); |
2290 | /* Remove the current tasks stale references to the old mm_struct */ | 2290 | /* Remove the current tasks stale references to the old mm_struct */ |
2291 | extern void mm_release(struct task_struct *, struct mm_struct *); | 2291 | extern void mm_release(struct task_struct *, struct mm_struct *); |
2292 | /* Allocate a new mm structure and copy contents from tsk->mm */ | 2292 | /* Allocate a new mm structure and copy contents from tsk->mm */ |
2293 | extern struct mm_struct *dup_mm(struct task_struct *tsk); | 2293 | extern struct mm_struct *dup_mm(struct task_struct *tsk); |
2294 | 2294 | ||
2295 | extern int copy_thread(unsigned long, unsigned long, unsigned long, | 2295 | extern int copy_thread(unsigned long, unsigned long, unsigned long, |
2296 | struct task_struct *, struct pt_regs *); | 2296 | struct task_struct *, struct pt_regs *); |
2297 | extern void flush_thread(void); | 2297 | extern void flush_thread(void); |
2298 | extern void exit_thread(void); | 2298 | extern void exit_thread(void); |
2299 | 2299 | ||
2300 | extern void exit_files(struct task_struct *); | 2300 | extern void exit_files(struct task_struct *); |
2301 | extern void __cleanup_sighand(struct sighand_struct *); | 2301 | extern void __cleanup_sighand(struct sighand_struct *); |
2302 | 2302 | ||
2303 | extern void exit_itimers(struct signal_struct *); | 2303 | extern void exit_itimers(struct signal_struct *); |
2304 | extern void flush_itimer_signals(void); | 2304 | extern void flush_itimer_signals(void); |
2305 | 2305 | ||
2306 | extern void do_group_exit(int); | 2306 | extern void do_group_exit(int); |
2307 | 2307 | ||
2308 | extern void daemonize(const char *, ...); | 2308 | extern void daemonize(const char *, ...); |
2309 | extern int allow_signal(int); | 2309 | extern int allow_signal(int); |
2310 | extern int disallow_signal(int); | 2310 | extern int disallow_signal(int); |
2311 | 2311 | ||
2312 | extern int do_execve(const char *, | 2312 | extern int do_execve(const char *, |
2313 | const char __user * const __user *, | 2313 | const char __user * const __user *, |
2314 | const char __user * const __user *, struct pt_regs *); | 2314 | const char __user * const __user *, struct pt_regs *); |
2315 | extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); | 2315 | extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); |
2316 | struct task_struct *fork_idle(int); | 2316 | struct task_struct *fork_idle(int); |
2317 | #ifdef CONFIG_GENERIC_KERNEL_THREAD | 2317 | #ifdef CONFIG_GENERIC_KERNEL_THREAD |
2318 | extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); | 2318 | extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); |
2319 | #endif | 2319 | #endif |
2320 | 2320 | ||
2321 | extern void set_task_comm(struct task_struct *tsk, char *from); | 2321 | extern void set_task_comm(struct task_struct *tsk, char *from); |
2322 | extern char *get_task_comm(char *to, struct task_struct *tsk); | 2322 | extern char *get_task_comm(char *to, struct task_struct *tsk); |
2323 | 2323 | ||
2324 | #ifdef CONFIG_SMP | 2324 | #ifdef CONFIG_SMP |
2325 | void scheduler_ipi(void); | 2325 | void scheduler_ipi(void); |
2326 | extern unsigned long wait_task_inactive(struct task_struct *, long match_state); | 2326 | extern unsigned long wait_task_inactive(struct task_struct *, long match_state); |
2327 | #else | 2327 | #else |
2328 | static inline void scheduler_ipi(void) { } | 2328 | static inline void scheduler_ipi(void) { } |
2329 | static inline unsigned long wait_task_inactive(struct task_struct *p, | 2329 | static inline unsigned long wait_task_inactive(struct task_struct *p, |
2330 | long match_state) | 2330 | long match_state) |
2331 | { | 2331 | { |
2332 | return 1; | 2332 | return 1; |
2333 | } | 2333 | } |
2334 | #endif | 2334 | #endif |
2335 | 2335 | ||
2336 | #define next_task(p) \ | 2336 | #define next_task(p) \ |
2337 | list_entry_rcu((p)->tasks.next, struct task_struct, tasks) | 2337 | list_entry_rcu((p)->tasks.next, struct task_struct, tasks) |
2338 | 2338 | ||
2339 | #define for_each_process(p) \ | 2339 | #define for_each_process(p) \ |
2340 | for (p = &init_task ; (p = next_task(p)) != &init_task ; ) | 2340 | for (p = &init_task ; (p = next_task(p)) != &init_task ; ) |
2341 | 2341 | ||
2342 | extern bool current_is_single_threaded(void); | 2342 | extern bool current_is_single_threaded(void); |
2343 | 2343 | ||
2344 | /* | 2344 | /* |
2345 | * Careful: do_each_thread/while_each_thread is a double loop so | 2345 | * Careful: do_each_thread/while_each_thread is a double loop so |
2346 | * 'break' will not work as expected - use goto instead. | 2346 | * 'break' will not work as expected - use goto instead. |
2347 | */ | 2347 | */ |
2348 | #define do_each_thread(g, t) \ | 2348 | #define do_each_thread(g, t) \ |
2349 | for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do | 2349 | for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do |
2350 | 2350 | ||
2351 | #define while_each_thread(g, t) \ | 2351 | #define while_each_thread(g, t) \ |
2352 | while ((t = next_thread(t)) != g) | 2352 | while ((t = next_thread(t)) != g) |
2353 | 2353 | ||
2354 | static inline int get_nr_threads(struct task_struct *tsk) | 2354 | static inline int get_nr_threads(struct task_struct *tsk) |
2355 | { | 2355 | { |
2356 | return tsk->signal->nr_threads; | 2356 | return tsk->signal->nr_threads; |
2357 | } | 2357 | } |
2358 | 2358 | ||
2359 | static inline bool thread_group_leader(struct task_struct *p) | 2359 | static inline bool thread_group_leader(struct task_struct *p) |
2360 | { | 2360 | { |
2361 | return p->exit_signal >= 0; | 2361 | return p->exit_signal >= 0; |
2362 | } | 2362 | } |
2363 | 2363 | ||
2364 | /* Do to the insanities of de_thread it is possible for a process | 2364 | /* Do to the insanities of de_thread it is possible for a process |
2365 | * to have the pid of the thread group leader without actually being | 2365 | * to have the pid of the thread group leader without actually being |
2366 | * the thread group leader. For iteration through the pids in proc | 2366 | * the thread group leader. For iteration through the pids in proc |
2367 | * all we care about is that we have a task with the appropriate | 2367 | * all we care about is that we have a task with the appropriate |
2368 | * pid, we don't actually care if we have the right task. | 2368 | * pid, we don't actually care if we have the right task. |
2369 | */ | 2369 | */ |
2370 | static inline int has_group_leader_pid(struct task_struct *p) | 2370 | static inline int has_group_leader_pid(struct task_struct *p) |
2371 | { | 2371 | { |
2372 | return p->pid == p->tgid; | 2372 | return p->pid == p->tgid; |
2373 | } | 2373 | } |
2374 | 2374 | ||
2375 | static inline | 2375 | static inline |
2376 | int same_thread_group(struct task_struct *p1, struct task_struct *p2) | 2376 | int same_thread_group(struct task_struct *p1, struct task_struct *p2) |
2377 | { | 2377 | { |
2378 | return p1->tgid == p2->tgid; | 2378 | return p1->tgid == p2->tgid; |
2379 | } | 2379 | } |
2380 | 2380 | ||
2381 | static inline struct task_struct *next_thread(const struct task_struct *p) | 2381 | static inline struct task_struct *next_thread(const struct task_struct *p) |
2382 | { | 2382 | { |
2383 | return list_entry_rcu(p->thread_group.next, | 2383 | return list_entry_rcu(p->thread_group.next, |
2384 | struct task_struct, thread_group); | 2384 | struct task_struct, thread_group); |
2385 | } | 2385 | } |
2386 | 2386 | ||
2387 | static inline int thread_group_empty(struct task_struct *p) | 2387 | static inline int thread_group_empty(struct task_struct *p) |
2388 | { | 2388 | { |
2389 | return list_empty(&p->thread_group); | 2389 | return list_empty(&p->thread_group); |
2390 | } | 2390 | } |
2391 | 2391 | ||
2392 | #define delay_group_leader(p) \ | 2392 | #define delay_group_leader(p) \ |
2393 | (thread_group_leader(p) && !thread_group_empty(p)) | 2393 | (thread_group_leader(p) && !thread_group_empty(p)) |
2394 | 2394 | ||
2395 | /* | 2395 | /* |
2396 | * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring | 2396 | * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring |
2397 | * subscriptions and synchronises with wait4(). Also used in procfs. Also | 2397 | * subscriptions and synchronises with wait4(). Also used in procfs. Also |
2398 | * pins the final release of task.io_context. Also protects ->cpuset and | 2398 | * pins the final release of task.io_context. Also protects ->cpuset and |
2399 | * ->cgroup.subsys[]. And ->vfork_done. | 2399 | * ->cgroup.subsys[]. And ->vfork_done. |
2400 | * | 2400 | * |
2401 | * Nests both inside and outside of read_lock(&tasklist_lock). | 2401 | * Nests both inside and outside of read_lock(&tasklist_lock). |
2402 | * It must not be nested with write_lock_irq(&tasklist_lock), | 2402 | * It must not be nested with write_lock_irq(&tasklist_lock), |
2403 | * neither inside nor outside. | 2403 | * neither inside nor outside. |
2404 | */ | 2404 | */ |
2405 | static inline void task_lock(struct task_struct *p) | 2405 | static inline void task_lock(struct task_struct *p) |
2406 | { | 2406 | { |
2407 | spin_lock(&p->alloc_lock); | 2407 | spin_lock(&p->alloc_lock); |
2408 | } | 2408 | } |
2409 | 2409 | ||
2410 | static inline void task_unlock(struct task_struct *p) | 2410 | static inline void task_unlock(struct task_struct *p) |
2411 | { | 2411 | { |
2412 | spin_unlock(&p->alloc_lock); | 2412 | spin_unlock(&p->alloc_lock); |
2413 | } | 2413 | } |
2414 | 2414 | ||
2415 | extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | 2415 | extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, |
2416 | unsigned long *flags); | 2416 | unsigned long *flags); |
2417 | 2417 | ||
2418 | static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, | 2418 | static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, |
2419 | unsigned long *flags) | 2419 | unsigned long *flags) |
2420 | { | 2420 | { |
2421 | struct sighand_struct *ret; | 2421 | struct sighand_struct *ret; |
2422 | 2422 | ||
2423 | ret = __lock_task_sighand(tsk, flags); | 2423 | ret = __lock_task_sighand(tsk, flags); |
2424 | (void)__cond_lock(&tsk->sighand->siglock, ret); | 2424 | (void)__cond_lock(&tsk->sighand->siglock, ret); |
2425 | return ret; | 2425 | return ret; |
2426 | } | 2426 | } |
2427 | 2427 | ||
2428 | static inline void unlock_task_sighand(struct task_struct *tsk, | 2428 | static inline void unlock_task_sighand(struct task_struct *tsk, |
2429 | unsigned long *flags) | 2429 | unsigned long *flags) |
2430 | { | 2430 | { |
2431 | spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); | 2431 | spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); |
2432 | } | 2432 | } |
2433 | 2433 | ||
2434 | #ifdef CONFIG_CGROUPS | 2434 | #ifdef CONFIG_CGROUPS |
2435 | static inline void threadgroup_change_begin(struct task_struct *tsk) | 2435 | static inline void threadgroup_change_begin(struct task_struct *tsk) |
2436 | { | 2436 | { |
2437 | down_read(&tsk->signal->group_rwsem); | 2437 | down_read(&tsk->signal->group_rwsem); |
2438 | } | 2438 | } |
2439 | static inline void threadgroup_change_end(struct task_struct *tsk) | 2439 | static inline void threadgroup_change_end(struct task_struct *tsk) |
2440 | { | 2440 | { |
2441 | up_read(&tsk->signal->group_rwsem); | 2441 | up_read(&tsk->signal->group_rwsem); |
2442 | } | 2442 | } |
2443 | 2443 | ||
2444 | /** | 2444 | /** |
2445 | * threadgroup_lock - lock threadgroup | 2445 | * threadgroup_lock - lock threadgroup |
2446 | * @tsk: member task of the threadgroup to lock | 2446 | * @tsk: member task of the threadgroup to lock |
2447 | * | 2447 | * |
2448 | * Lock the threadgroup @tsk belongs to. No new task is allowed to enter | 2448 | * Lock the threadgroup @tsk belongs to. No new task is allowed to enter |
2449 | * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or | 2449 | * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or |
2450 | * perform exec. This is useful for cases where the threadgroup needs to | 2450 | * perform exec. This is useful for cases where the threadgroup needs to |
2451 | * stay stable across blockable operations. | 2451 | * stay stable across blockable operations. |
2452 | * | 2452 | * |
2453 | * fork and exit paths explicitly call threadgroup_change_{begin|end}() for | 2453 | * fork and exit paths explicitly call threadgroup_change_{begin|end}() for |
2454 | * synchronization. While held, no new task will be added to threadgroup | 2454 | * synchronization. While held, no new task will be added to threadgroup |
2455 | * and no existing live task will have its PF_EXITING set. | 2455 | * and no existing live task will have its PF_EXITING set. |
2456 | * | 2456 | * |
2457 | * During exec, a task goes and puts its thread group through unusual | 2457 | * During exec, a task goes and puts its thread group through unusual |
2458 | * changes. After de-threading, exclusive access is assumed to resources | 2458 | * changes. After de-threading, exclusive access is assumed to resources |
2459 | * which are usually shared by tasks in the same group - e.g. sighand may | 2459 | * which are usually shared by tasks in the same group - e.g. sighand may |
2460 | * be replaced with a new one. Also, the exec'ing task takes over group | 2460 | * be replaced with a new one. Also, the exec'ing task takes over group |
2461 | * leader role including its pid. Exclude these changes while locked by | 2461 | * leader role including its pid. Exclude these changes while locked by |
2462 | * grabbing cred_guard_mutex which is used to synchronize exec path. | 2462 | * grabbing cred_guard_mutex which is used to synchronize exec path. |
2463 | */ | 2463 | */ |
2464 | static inline void threadgroup_lock(struct task_struct *tsk) | 2464 | static inline void threadgroup_lock(struct task_struct *tsk) |
2465 | { | 2465 | { |
2466 | /* | 2466 | /* |
2467 | * exec uses exit for de-threading nesting group_rwsem inside | 2467 | * exec uses exit for de-threading nesting group_rwsem inside |
2468 | * cred_guard_mutex. Grab cred_guard_mutex first. | 2468 | * cred_guard_mutex. Grab cred_guard_mutex first. |
2469 | */ | 2469 | */ |
2470 | mutex_lock(&tsk->signal->cred_guard_mutex); | 2470 | mutex_lock(&tsk->signal->cred_guard_mutex); |
2471 | down_write(&tsk->signal->group_rwsem); | 2471 | down_write(&tsk->signal->group_rwsem); |
2472 | } | 2472 | } |
2473 | 2473 | ||
2474 | /** | 2474 | /** |
2475 | * threadgroup_unlock - unlock threadgroup | 2475 | * threadgroup_unlock - unlock threadgroup |
2476 | * @tsk: member task of the threadgroup to unlock | 2476 | * @tsk: member task of the threadgroup to unlock |
2477 | * | 2477 | * |
2478 | * Reverse threadgroup_lock(). | 2478 | * Reverse threadgroup_lock(). |
2479 | */ | 2479 | */ |
2480 | static inline void threadgroup_unlock(struct task_struct *tsk) | 2480 | static inline void threadgroup_unlock(struct task_struct *tsk) |
2481 | { | 2481 | { |
2482 | up_write(&tsk->signal->group_rwsem); | 2482 | up_write(&tsk->signal->group_rwsem); |
2483 | mutex_unlock(&tsk->signal->cred_guard_mutex); | 2483 | mutex_unlock(&tsk->signal->cred_guard_mutex); |
2484 | } | 2484 | } |
2485 | #else | 2485 | #else |
2486 | static inline void threadgroup_change_begin(struct task_struct *tsk) {} | 2486 | static inline void threadgroup_change_begin(struct task_struct *tsk) {} |
2487 | static inline void threadgroup_change_end(struct task_struct *tsk) {} | 2487 | static inline void threadgroup_change_end(struct task_struct *tsk) {} |
2488 | static inline void threadgroup_lock(struct task_struct *tsk) {} | 2488 | static inline void threadgroup_lock(struct task_struct *tsk) {} |
2489 | static inline void threadgroup_unlock(struct task_struct *tsk) {} | 2489 | static inline void threadgroup_unlock(struct task_struct *tsk) {} |
2490 | #endif | 2490 | #endif |
2491 | 2491 | ||
2492 | #ifndef __HAVE_THREAD_FUNCTIONS | 2492 | #ifndef __HAVE_THREAD_FUNCTIONS |
2493 | 2493 | ||
2494 | #define task_thread_info(task) ((struct thread_info *)(task)->stack) | 2494 | #define task_thread_info(task) ((struct thread_info *)(task)->stack) |
2495 | #define task_stack_page(task) ((task)->stack) | 2495 | #define task_stack_page(task) ((task)->stack) |
2496 | 2496 | ||
2497 | static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) | 2497 | static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) |
2498 | { | 2498 | { |
2499 | *task_thread_info(p) = *task_thread_info(org); | 2499 | *task_thread_info(p) = *task_thread_info(org); |
2500 | task_thread_info(p)->task = p; | 2500 | task_thread_info(p)->task = p; |
2501 | } | 2501 | } |
2502 | 2502 | ||
2503 | static inline unsigned long *end_of_stack(struct task_struct *p) | 2503 | static inline unsigned long *end_of_stack(struct task_struct *p) |
2504 | { | 2504 | { |
2505 | return (unsigned long *)(task_thread_info(p) + 1); | 2505 | return (unsigned long *)(task_thread_info(p) + 1); |
2506 | } | 2506 | } |
2507 | 2507 | ||
2508 | #endif | 2508 | #endif |
2509 | 2509 | ||
2510 | static inline int object_is_on_stack(void *obj) | 2510 | static inline int object_is_on_stack(void *obj) |
2511 | { | 2511 | { |
2512 | void *stack = task_stack_page(current); | 2512 | void *stack = task_stack_page(current); |
2513 | 2513 | ||
2514 | return (obj >= stack) && (obj < (stack + THREAD_SIZE)); | 2514 | return (obj >= stack) && (obj < (stack + THREAD_SIZE)); |
2515 | } | 2515 | } |
2516 | 2516 | ||
2517 | extern void thread_info_cache_init(void); | 2517 | extern void thread_info_cache_init(void); |
2518 | 2518 | ||
2519 | #ifdef CONFIG_DEBUG_STACK_USAGE | 2519 | #ifdef CONFIG_DEBUG_STACK_USAGE |
2520 | static inline unsigned long stack_not_used(struct task_struct *p) | 2520 | static inline unsigned long stack_not_used(struct task_struct *p) |
2521 | { | 2521 | { |
2522 | unsigned long *n = end_of_stack(p); | 2522 | unsigned long *n = end_of_stack(p); |
2523 | 2523 | ||
2524 | do { /* Skip over canary */ | 2524 | do { /* Skip over canary */ |
2525 | n++; | 2525 | n++; |
2526 | } while (!*n); | 2526 | } while (!*n); |
2527 | 2527 | ||
2528 | return (unsigned long)n - (unsigned long)end_of_stack(p); | 2528 | return (unsigned long)n - (unsigned long)end_of_stack(p); |
2529 | } | 2529 | } |
2530 | #endif | 2530 | #endif |
2531 | 2531 | ||
2532 | /* set thread flags in other task's structures | 2532 | /* set thread flags in other task's structures |
2533 | * - see asm/thread_info.h for TIF_xxxx flags available | 2533 | * - see asm/thread_info.h for TIF_xxxx flags available |
2534 | */ | 2534 | */ |
2535 | static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) | 2535 | static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) |
2536 | { | 2536 | { |
2537 | set_ti_thread_flag(task_thread_info(tsk), flag); | 2537 | set_ti_thread_flag(task_thread_info(tsk), flag); |
2538 | } | 2538 | } |
2539 | 2539 | ||
2540 | static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) | 2540 | static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) |
2541 | { | 2541 | { |
2542 | clear_ti_thread_flag(task_thread_info(tsk), flag); | 2542 | clear_ti_thread_flag(task_thread_info(tsk), flag); |
2543 | } | 2543 | } |
2544 | 2544 | ||
2545 | static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) | 2545 | static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) |
2546 | { | 2546 | { |
2547 | return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); | 2547 | return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); |
2548 | } | 2548 | } |
2549 | 2549 | ||
2550 | static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) | 2550 | static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) |
2551 | { | 2551 | { |
2552 | return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); | 2552 | return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); |
2553 | } | 2553 | } |
2554 | 2554 | ||
2555 | static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) | 2555 | static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) |
2556 | { | 2556 | { |
2557 | return test_ti_thread_flag(task_thread_info(tsk), flag); | 2557 | return test_ti_thread_flag(task_thread_info(tsk), flag); |
2558 | } | 2558 | } |
2559 | 2559 | ||
2560 | static inline void set_tsk_need_resched(struct task_struct *tsk) | 2560 | static inline void set_tsk_need_resched(struct task_struct *tsk) |
2561 | { | 2561 | { |
2562 | set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); | 2562 | set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); |
2563 | } | 2563 | } |
2564 | 2564 | ||
2565 | static inline void clear_tsk_need_resched(struct task_struct *tsk) | 2565 | static inline void clear_tsk_need_resched(struct task_struct *tsk) |
2566 | { | 2566 | { |
2567 | clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); | 2567 | clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); |
2568 | } | 2568 | } |
2569 | 2569 | ||
2570 | static inline int test_tsk_need_resched(struct task_struct *tsk) | 2570 | static inline int test_tsk_need_resched(struct task_struct *tsk) |
2571 | { | 2571 | { |
2572 | return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); | 2572 | return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); |
2573 | } | 2573 | } |
2574 | 2574 | ||
2575 | static inline int restart_syscall(void) | 2575 | static inline int restart_syscall(void) |
2576 | { | 2576 | { |
2577 | set_tsk_thread_flag(current, TIF_SIGPENDING); | 2577 | set_tsk_thread_flag(current, TIF_SIGPENDING); |
2578 | return -ERESTARTNOINTR; | 2578 | return -ERESTARTNOINTR; |
2579 | } | 2579 | } |
2580 | 2580 | ||
2581 | static inline int signal_pending(struct task_struct *p) | 2581 | static inline int signal_pending(struct task_struct *p) |
2582 | { | 2582 | { |
2583 | return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); | 2583 | return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); |
2584 | } | 2584 | } |
2585 | 2585 | ||
2586 | static inline int __fatal_signal_pending(struct task_struct *p) | 2586 | static inline int __fatal_signal_pending(struct task_struct *p) |
2587 | { | 2587 | { |
2588 | return unlikely(sigismember(&p->pending.signal, SIGKILL)); | 2588 | return unlikely(sigismember(&p->pending.signal, SIGKILL)); |
2589 | } | 2589 | } |
2590 | 2590 | ||
2591 | static inline int fatal_signal_pending(struct task_struct *p) | 2591 | static inline int fatal_signal_pending(struct task_struct *p) |
2592 | { | 2592 | { |
2593 | return signal_pending(p) && __fatal_signal_pending(p); | 2593 | return signal_pending(p) && __fatal_signal_pending(p); |
2594 | } | 2594 | } |
2595 | 2595 | ||
2596 | static inline int signal_pending_state(long state, struct task_struct *p) | 2596 | static inline int signal_pending_state(long state, struct task_struct *p) |
2597 | { | 2597 | { |
2598 | if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) | 2598 | if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) |
2599 | return 0; | 2599 | return 0; |
2600 | if (!signal_pending(p)) | 2600 | if (!signal_pending(p)) |
2601 | return 0; | 2601 | return 0; |
2602 | 2602 | ||
2603 | return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); | 2603 | return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); |
2604 | } | 2604 | } |
2605 | 2605 | ||
2606 | static inline int need_resched(void) | 2606 | static inline int need_resched(void) |
2607 | { | 2607 | { |
2608 | return unlikely(test_thread_flag(TIF_NEED_RESCHED)); | 2608 | return unlikely(test_thread_flag(TIF_NEED_RESCHED)); |
2609 | } | 2609 | } |
2610 | 2610 | ||
2611 | /* | 2611 | /* |
2612 | * cond_resched() and cond_resched_lock(): latency reduction via | 2612 | * cond_resched() and cond_resched_lock(): latency reduction via |
2613 | * explicit rescheduling in places that are safe. The return | 2613 | * explicit rescheduling in places that are safe. The return |
2614 | * value indicates whether a reschedule was done in fact. | 2614 | * value indicates whether a reschedule was done in fact. |
2615 | * cond_resched_lock() will drop the spinlock before scheduling, | 2615 | * cond_resched_lock() will drop the spinlock before scheduling, |
2616 | * cond_resched_softirq() will enable bhs before scheduling. | 2616 | * cond_resched_softirq() will enable bhs before scheduling. |
2617 | */ | 2617 | */ |
2618 | extern int _cond_resched(void); | 2618 | extern int _cond_resched(void); |
2619 | 2619 | ||
2620 | #define cond_resched() ({ \ | 2620 | #define cond_resched() ({ \ |
2621 | __might_sleep(__FILE__, __LINE__, 0); \ | 2621 | __might_sleep(__FILE__, __LINE__, 0); \ |
2622 | _cond_resched(); \ | 2622 | _cond_resched(); \ |
2623 | }) | 2623 | }) |
2624 | 2624 | ||
2625 | extern int __cond_resched_lock(spinlock_t *lock); | 2625 | extern int __cond_resched_lock(spinlock_t *lock); |
2626 | 2626 | ||
2627 | #ifdef CONFIG_PREEMPT_COUNT | 2627 | #ifdef CONFIG_PREEMPT_COUNT |
2628 | #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET | 2628 | #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET |
2629 | #else | 2629 | #else |
2630 | #define PREEMPT_LOCK_OFFSET 0 | 2630 | #define PREEMPT_LOCK_OFFSET 0 |
2631 | #endif | 2631 | #endif |
2632 | 2632 | ||
2633 | #define cond_resched_lock(lock) ({ \ | 2633 | #define cond_resched_lock(lock) ({ \ |
2634 | __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ | 2634 | __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ |
2635 | __cond_resched_lock(lock); \ | 2635 | __cond_resched_lock(lock); \ |
2636 | }) | 2636 | }) |
2637 | 2637 | ||
2638 | extern int __cond_resched_softirq(void); | 2638 | extern int __cond_resched_softirq(void); |
2639 | 2639 | ||
2640 | #define cond_resched_softirq() ({ \ | 2640 | #define cond_resched_softirq() ({ \ |
2641 | __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ | 2641 | __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ |
2642 | __cond_resched_softirq(); \ | 2642 | __cond_resched_softirq(); \ |
2643 | }) | 2643 | }) |
2644 | 2644 | ||
2645 | /* | 2645 | /* |
2646 | * Does a critical section need to be broken due to another | 2646 | * Does a critical section need to be broken due to another |
2647 | * task waiting?: (technically does not depend on CONFIG_PREEMPT, | 2647 | * task waiting?: (technically does not depend on CONFIG_PREEMPT, |
2648 | * but a general need for low latency) | 2648 | * but a general need for low latency) |
2649 | */ | 2649 | */ |
2650 | static inline int spin_needbreak(spinlock_t *lock) | 2650 | static inline int spin_needbreak(spinlock_t *lock) |
2651 | { | 2651 | { |
2652 | #ifdef CONFIG_PREEMPT | 2652 | #ifdef CONFIG_PREEMPT |
2653 | return spin_is_contended(lock); | 2653 | return spin_is_contended(lock); |
2654 | #else | 2654 | #else |
2655 | return 0; | 2655 | return 0; |
2656 | #endif | 2656 | #endif |
2657 | } | 2657 | } |
2658 | 2658 | ||
2659 | /* | 2659 | /* |
2660 | * Thread group CPU time accounting. | 2660 | * Thread group CPU time accounting. |
2661 | */ | 2661 | */ |
2662 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); | 2662 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); |
2663 | void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); | 2663 | void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); |
2664 | 2664 | ||
2665 | static inline void thread_group_cputime_init(struct signal_struct *sig) | 2665 | static inline void thread_group_cputime_init(struct signal_struct *sig) |
2666 | { | 2666 | { |
2667 | raw_spin_lock_init(&sig->cputimer.lock); | 2667 | raw_spin_lock_init(&sig->cputimer.lock); |
2668 | } | 2668 | } |
2669 | 2669 | ||
2670 | /* | 2670 | /* |
2671 | * Reevaluate whether the task has signals pending delivery. | 2671 | * Reevaluate whether the task has signals pending delivery. |
2672 | * Wake the task if so. | 2672 | * Wake the task if so. |
2673 | * This is required every time the blocked sigset_t changes. | 2673 | * This is required every time the blocked sigset_t changes. |
2674 | * callers must hold sighand->siglock. | 2674 | * callers must hold sighand->siglock. |
2675 | */ | 2675 | */ |
2676 | extern void recalc_sigpending_and_wake(struct task_struct *t); | 2676 | extern void recalc_sigpending_and_wake(struct task_struct *t); |
2677 | extern void recalc_sigpending(void); | 2677 | extern void recalc_sigpending(void); |
2678 | 2678 | ||
2679 | extern void signal_wake_up(struct task_struct *t, int resume_stopped); | 2679 | extern void signal_wake_up(struct task_struct *t, int resume_stopped); |
2680 | 2680 | ||
2681 | /* | 2681 | /* |
2682 | * Wrappers for p->thread_info->cpu access. No-op on UP. | 2682 | * Wrappers for p->thread_info->cpu access. No-op on UP. |
2683 | */ | 2683 | */ |
2684 | #ifdef CONFIG_SMP | 2684 | #ifdef CONFIG_SMP |
2685 | 2685 | ||
2686 | static inline unsigned int task_cpu(const struct task_struct *p) | 2686 | static inline unsigned int task_cpu(const struct task_struct *p) |
2687 | { | 2687 | { |
2688 | return task_thread_info(p)->cpu; | 2688 | return task_thread_info(p)->cpu; |
2689 | } | 2689 | } |
2690 | 2690 | ||
2691 | extern void set_task_cpu(struct task_struct *p, unsigned int cpu); | 2691 | extern void set_task_cpu(struct task_struct *p, unsigned int cpu); |
2692 | 2692 | ||
2693 | #else | 2693 | #else |
2694 | 2694 | ||
2695 | static inline unsigned int task_cpu(const struct task_struct *p) | 2695 | static inline unsigned int task_cpu(const struct task_struct *p) |
2696 | { | 2696 | { |
2697 | return 0; | 2697 | return 0; |
2698 | } | 2698 | } |
2699 | 2699 | ||
2700 | static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) | 2700 | static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) |
2701 | { | 2701 | { |
2702 | } | 2702 | } |
2703 | 2703 | ||
2704 | #endif /* CONFIG_SMP */ | 2704 | #endif /* CONFIG_SMP */ |
2705 | 2705 | ||
2706 | extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); | 2706 | extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); |
2707 | extern long sched_getaffinity(pid_t pid, struct cpumask *mask); | 2707 | extern long sched_getaffinity(pid_t pid, struct cpumask *mask); |
2708 | 2708 | ||
2709 | extern void normalize_rt_tasks(void); | 2709 | extern void normalize_rt_tasks(void); |
2710 | 2710 | ||
2711 | #ifdef CONFIG_CGROUP_SCHED | 2711 | #ifdef CONFIG_CGROUP_SCHED |
2712 | 2712 | ||
2713 | extern struct task_group root_task_group; | 2713 | extern struct task_group root_task_group; |
2714 | 2714 | ||
2715 | extern struct task_group *sched_create_group(struct task_group *parent); | 2715 | extern struct task_group *sched_create_group(struct task_group *parent); |
2716 | extern void sched_destroy_group(struct task_group *tg); | 2716 | extern void sched_destroy_group(struct task_group *tg); |
2717 | extern void sched_move_task(struct task_struct *tsk); | 2717 | extern void sched_move_task(struct task_struct *tsk); |
2718 | #ifdef CONFIG_FAIR_GROUP_SCHED | 2718 | #ifdef CONFIG_FAIR_GROUP_SCHED |
2719 | extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); | 2719 | extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); |
2720 | extern unsigned long sched_group_shares(struct task_group *tg); | 2720 | extern unsigned long sched_group_shares(struct task_group *tg); |
2721 | #endif | 2721 | #endif |
2722 | #ifdef CONFIG_RT_GROUP_SCHED | 2722 | #ifdef CONFIG_RT_GROUP_SCHED |
2723 | extern int sched_group_set_rt_runtime(struct task_group *tg, | 2723 | extern int sched_group_set_rt_runtime(struct task_group *tg, |
2724 | long rt_runtime_us); | 2724 | long rt_runtime_us); |
2725 | extern long sched_group_rt_runtime(struct task_group *tg); | 2725 | extern long sched_group_rt_runtime(struct task_group *tg); |
2726 | extern int sched_group_set_rt_period(struct task_group *tg, | 2726 | extern int sched_group_set_rt_period(struct task_group *tg, |
2727 | long rt_period_us); | 2727 | long rt_period_us); |
2728 | extern long sched_group_rt_period(struct task_group *tg); | 2728 | extern long sched_group_rt_period(struct task_group *tg); |
2729 | extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); | 2729 | extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); |
2730 | #endif | 2730 | #endif |
2731 | #endif /* CONFIG_CGROUP_SCHED */ | 2731 | #endif /* CONFIG_CGROUP_SCHED */ |
2732 | 2732 | ||
2733 | extern int task_can_switch_user(struct user_struct *up, | 2733 | extern int task_can_switch_user(struct user_struct *up, |
2734 | struct task_struct *tsk); | 2734 | struct task_struct *tsk); |
2735 | 2735 | ||
2736 | #ifdef CONFIG_TASK_XACCT | 2736 | #ifdef CONFIG_TASK_XACCT |
2737 | static inline void add_rchar(struct task_struct *tsk, ssize_t amt) | 2737 | static inline void add_rchar(struct task_struct *tsk, ssize_t amt) |
2738 | { | 2738 | { |
2739 | tsk->ioac.rchar += amt; | 2739 | tsk->ioac.rchar += amt; |
2740 | } | 2740 | } |
2741 | 2741 | ||
2742 | static inline void add_wchar(struct task_struct *tsk, ssize_t amt) | 2742 | static inline void add_wchar(struct task_struct *tsk, ssize_t amt) |
2743 | { | 2743 | { |
2744 | tsk->ioac.wchar += amt; | 2744 | tsk->ioac.wchar += amt; |
2745 | } | 2745 | } |
2746 | 2746 | ||
2747 | static inline void inc_syscr(struct task_struct *tsk) | 2747 | static inline void inc_syscr(struct task_struct *tsk) |
2748 | { | 2748 | { |
2749 | tsk->ioac.syscr++; | 2749 | tsk->ioac.syscr++; |
2750 | } | 2750 | } |
2751 | 2751 | ||
2752 | static inline void inc_syscw(struct task_struct *tsk) | 2752 | static inline void inc_syscw(struct task_struct *tsk) |
2753 | { | 2753 | { |
2754 | tsk->ioac.syscw++; | 2754 | tsk->ioac.syscw++; |
2755 | } | 2755 | } |
2756 | #else | 2756 | #else |
2757 | static inline void add_rchar(struct task_struct *tsk, ssize_t amt) | 2757 | static inline void add_rchar(struct task_struct *tsk, ssize_t amt) |
2758 | { | 2758 | { |
2759 | } | 2759 | } |
2760 | 2760 | ||
2761 | static inline void add_wchar(struct task_struct *tsk, ssize_t amt) | 2761 | static inline void add_wchar(struct task_struct *tsk, ssize_t amt) |
2762 | { | 2762 | { |
2763 | } | 2763 | } |
2764 | 2764 | ||
2765 | static inline void inc_syscr(struct task_struct *tsk) | 2765 | static inline void inc_syscr(struct task_struct *tsk) |
2766 | { | 2766 | { |
2767 | } | 2767 | } |
2768 | 2768 | ||
2769 | static inline void inc_syscw(struct task_struct *tsk) | 2769 | static inline void inc_syscw(struct task_struct *tsk) |
2770 | { | 2770 | { |
2771 | } | 2771 | } |
2772 | #endif | 2772 | #endif |
2773 | 2773 | ||
2774 | #ifndef TASK_SIZE_OF | 2774 | #ifndef TASK_SIZE_OF |
2775 | #define TASK_SIZE_OF(tsk) TASK_SIZE | 2775 | #define TASK_SIZE_OF(tsk) TASK_SIZE |
2776 | #endif | 2776 | #endif |
2777 | 2777 | ||
2778 | #ifdef CONFIG_MM_OWNER | 2778 | #ifdef CONFIG_MM_OWNER |
2779 | extern void mm_update_next_owner(struct mm_struct *mm); | 2779 | extern void mm_update_next_owner(struct mm_struct *mm); |
2780 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); | 2780 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); |
2781 | #else | 2781 | #else |
2782 | static inline void mm_update_next_owner(struct mm_struct *mm) | 2782 | static inline void mm_update_next_owner(struct mm_struct *mm) |
2783 | { | 2783 | { |
2784 | } | 2784 | } |
2785 | 2785 | ||
2786 | static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) | 2786 | static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) |
2787 | { | 2787 | { |
2788 | } | 2788 | } |
2789 | #endif /* CONFIG_MM_OWNER */ | 2789 | #endif /* CONFIG_MM_OWNER */ |
2790 | 2790 | ||
2791 | static inline unsigned long task_rlimit(const struct task_struct *tsk, | 2791 | static inline unsigned long task_rlimit(const struct task_struct *tsk, |
2792 | unsigned int limit) | 2792 | unsigned int limit) |
2793 | { | 2793 | { |
2794 | return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); | 2794 | return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); |
2795 | } | 2795 | } |
2796 | 2796 | ||
2797 | static inline unsigned long task_rlimit_max(const struct task_struct *tsk, | 2797 | static inline unsigned long task_rlimit_max(const struct task_struct *tsk, |
2798 | unsigned int limit) | 2798 | unsigned int limit) |
2799 | { | 2799 | { |
2800 | return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); | 2800 | return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); |
2801 | } | 2801 | } |
2802 | 2802 | ||
2803 | static inline unsigned long rlimit(unsigned int limit) | 2803 | static inline unsigned long rlimit(unsigned int limit) |
2804 | { | 2804 | { |
2805 | return task_rlimit(current, limit); | 2805 | return task_rlimit(current, limit); |
2806 | } | 2806 | } |
2807 | 2807 | ||
2808 | static inline unsigned long rlimit_max(unsigned int limit) | 2808 | static inline unsigned long rlimit_max(unsigned int limit) |
2809 | { | 2809 | { |
2810 | return task_rlimit_max(current, limit); | 2810 | return task_rlimit_max(current, limit); |
2811 | } | 2811 | } |
2812 | 2812 | ||
2813 | #endif | 2813 | #endif |
2814 | 2814 |
kernel/exit.c
1 | /* | 1 | /* |
2 | * linux/kernel/exit.c | 2 | * linux/kernel/exit.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/capability.h> | 11 | #include <linux/capability.h> |
12 | #include <linux/completion.h> | 12 | #include <linux/completion.h> |
13 | #include <linux/personality.h> | 13 | #include <linux/personality.h> |
14 | #include <linux/tty.h> | 14 | #include <linux/tty.h> |
15 | #include <linux/iocontext.h> | 15 | #include <linux/iocontext.h> |
16 | #include <linux/key.h> | 16 | #include <linux/key.h> |
17 | #include <linux/security.h> | 17 | #include <linux/security.h> |
18 | #include <linux/cpu.h> | 18 | #include <linux/cpu.h> |
19 | #include <linux/acct.h> | 19 | #include <linux/acct.h> |
20 | #include <linux/tsacct_kern.h> | 20 | #include <linux/tsacct_kern.h> |
21 | #include <linux/file.h> | 21 | #include <linux/file.h> |
22 | #include <linux/fdtable.h> | 22 | #include <linux/fdtable.h> |
23 | #include <linux/binfmts.h> | 23 | #include <linux/binfmts.h> |
24 | #include <linux/nsproxy.h> | 24 | #include <linux/nsproxy.h> |
25 | #include <linux/pid_namespace.h> | 25 | #include <linux/pid_namespace.h> |
26 | #include <linux/ptrace.h> | 26 | #include <linux/ptrace.h> |
27 | #include <linux/profile.h> | 27 | #include <linux/profile.h> |
28 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
29 | #include <linux/proc_fs.h> | 29 | #include <linux/proc_fs.h> |
30 | #include <linux/kthread.h> | 30 | #include <linux/kthread.h> |
31 | #include <linux/mempolicy.h> | 31 | #include <linux/mempolicy.h> |
32 | #include <linux/taskstats_kern.h> | 32 | #include <linux/taskstats_kern.h> |
33 | #include <linux/delayacct.h> | 33 | #include <linux/delayacct.h> |
34 | #include <linux/freezer.h> | 34 | #include <linux/freezer.h> |
35 | #include <linux/cgroup.h> | 35 | #include <linux/cgroup.h> |
36 | #include <linux/syscalls.h> | 36 | #include <linux/syscalls.h> |
37 | #include <linux/signal.h> | 37 | #include <linux/signal.h> |
38 | #include <linux/posix-timers.h> | 38 | #include <linux/posix-timers.h> |
39 | #include <linux/cn_proc.h> | 39 | #include <linux/cn_proc.h> |
40 | #include <linux/mutex.h> | 40 | #include <linux/mutex.h> |
41 | #include <linux/futex.h> | 41 | #include <linux/futex.h> |
42 | #include <linux/pipe_fs_i.h> | 42 | #include <linux/pipe_fs_i.h> |
43 | #include <linux/audit.h> /* for audit_free() */ | 43 | #include <linux/audit.h> /* for audit_free() */ |
44 | #include <linux/resource.h> | 44 | #include <linux/resource.h> |
45 | #include <linux/blkdev.h> | 45 | #include <linux/blkdev.h> |
46 | #include <linux/task_io_accounting_ops.h> | 46 | #include <linux/task_io_accounting_ops.h> |
47 | #include <linux/tracehook.h> | 47 | #include <linux/tracehook.h> |
48 | #include <linux/fs_struct.h> | 48 | #include <linux/fs_struct.h> |
49 | #include <linux/init_task.h> | 49 | #include <linux/init_task.h> |
50 | #include <linux/perf_event.h> | 50 | #include <linux/perf_event.h> |
51 | #include <trace/events/sched.h> | 51 | #include <trace/events/sched.h> |
52 | #include <linux/hw_breakpoint.h> | 52 | #include <linux/hw_breakpoint.h> |
53 | #include <linux/oom.h> | 53 | #include <linux/oom.h> |
54 | #include <linux/writeback.h> | 54 | #include <linux/writeback.h> |
55 | #include <linux/shm.h> | 55 | #include <linux/shm.h> |
56 | 56 | ||
57 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
58 | #include <asm/unistd.h> | 58 | #include <asm/unistd.h> |
59 | #include <asm/pgtable.h> | 59 | #include <asm/pgtable.h> |
60 | #include <asm/mmu_context.h> | 60 | #include <asm/mmu_context.h> |
61 | 61 | ||
62 | static void exit_mm(struct task_struct * tsk); | 62 | static void exit_mm(struct task_struct * tsk); |
63 | 63 | ||
64 | static void __unhash_process(struct task_struct *p, bool group_dead) | 64 | static void __unhash_process(struct task_struct *p, bool group_dead) |
65 | { | 65 | { |
66 | nr_threads--; | 66 | nr_threads--; |
67 | detach_pid(p, PIDTYPE_PID); | 67 | detach_pid(p, PIDTYPE_PID); |
68 | if (group_dead) { | 68 | if (group_dead) { |
69 | detach_pid(p, PIDTYPE_PGID); | 69 | detach_pid(p, PIDTYPE_PGID); |
70 | detach_pid(p, PIDTYPE_SID); | 70 | detach_pid(p, PIDTYPE_SID); |
71 | 71 | ||
72 | list_del_rcu(&p->tasks); | 72 | list_del_rcu(&p->tasks); |
73 | list_del_init(&p->sibling); | 73 | list_del_init(&p->sibling); |
74 | __this_cpu_dec(process_counts); | 74 | __this_cpu_dec(process_counts); |
75 | /* | 75 | /* |
76 | * If we are the last child process in a pid namespace to be | 76 | * If we are the last child process in a pid namespace to be |
77 | * reaped, notify the reaper sleeping zap_pid_ns_processes(). | 77 | * reaped, notify the reaper sleeping zap_pid_ns_processes(). |
78 | */ | 78 | */ |
79 | if (IS_ENABLED(CONFIG_PID_NS)) { | 79 | if (IS_ENABLED(CONFIG_PID_NS)) { |
80 | struct task_struct *parent = p->real_parent; | 80 | struct task_struct *parent = p->real_parent; |
81 | 81 | ||
82 | if ((task_active_pid_ns(parent)->child_reaper == parent) && | 82 | if ((task_active_pid_ns(parent)->child_reaper == parent) && |
83 | list_empty(&parent->children) && | 83 | list_empty(&parent->children) && |
84 | (parent->flags & PF_EXITING)) | 84 | (parent->flags & PF_EXITING)) |
85 | wake_up_process(parent); | 85 | wake_up_process(parent); |
86 | } | 86 | } |
87 | } | 87 | } |
88 | list_del_rcu(&p->thread_group); | 88 | list_del_rcu(&p->thread_group); |
89 | } | 89 | } |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * This function expects the tasklist_lock write-locked. | 92 | * This function expects the tasklist_lock write-locked. |
93 | */ | 93 | */ |
94 | static void __exit_signal(struct task_struct *tsk) | 94 | static void __exit_signal(struct task_struct *tsk) |
95 | { | 95 | { |
96 | struct signal_struct *sig = tsk->signal; | 96 | struct signal_struct *sig = tsk->signal; |
97 | bool group_dead = thread_group_leader(tsk); | 97 | bool group_dead = thread_group_leader(tsk); |
98 | struct sighand_struct *sighand; | 98 | struct sighand_struct *sighand; |
99 | struct tty_struct *uninitialized_var(tty); | 99 | struct tty_struct *uninitialized_var(tty); |
100 | 100 | ||
101 | sighand = rcu_dereference_check(tsk->sighand, | 101 | sighand = rcu_dereference_check(tsk->sighand, |
102 | lockdep_tasklist_lock_is_held()); | 102 | lockdep_tasklist_lock_is_held()); |
103 | spin_lock(&sighand->siglock); | 103 | spin_lock(&sighand->siglock); |
104 | 104 | ||
105 | posix_cpu_timers_exit(tsk); | 105 | posix_cpu_timers_exit(tsk); |
106 | if (group_dead) { | 106 | if (group_dead) { |
107 | posix_cpu_timers_exit_group(tsk); | 107 | posix_cpu_timers_exit_group(tsk); |
108 | tty = sig->tty; | 108 | tty = sig->tty; |
109 | sig->tty = NULL; | 109 | sig->tty = NULL; |
110 | } else { | 110 | } else { |
111 | /* | 111 | /* |
112 | * This can only happen if the caller is de_thread(). | 112 | * This can only happen if the caller is de_thread(). |
113 | * FIXME: this is the temporary hack, we should teach | 113 | * FIXME: this is the temporary hack, we should teach |
114 | * posix-cpu-timers to handle this case correctly. | 114 | * posix-cpu-timers to handle this case correctly. |
115 | */ | 115 | */ |
116 | if (unlikely(has_group_leader_pid(tsk))) | 116 | if (unlikely(has_group_leader_pid(tsk))) |
117 | posix_cpu_timers_exit_group(tsk); | 117 | posix_cpu_timers_exit_group(tsk); |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * If there is any task waiting for the group exit | 120 | * If there is any task waiting for the group exit |
121 | * then notify it: | 121 | * then notify it: |
122 | */ | 122 | */ |
123 | if (sig->notify_count > 0 && !--sig->notify_count) | 123 | if (sig->notify_count > 0 && !--sig->notify_count) |
124 | wake_up_process(sig->group_exit_task); | 124 | wake_up_process(sig->group_exit_task); |
125 | 125 | ||
126 | if (tsk == sig->curr_target) | 126 | if (tsk == sig->curr_target) |
127 | sig->curr_target = next_thread(tsk); | 127 | sig->curr_target = next_thread(tsk); |
128 | /* | 128 | /* |
129 | * Accumulate here the counters for all threads but the | 129 | * Accumulate here the counters for all threads but the |
130 | * group leader as they die, so they can be added into | 130 | * group leader as they die, so they can be added into |
131 | * the process-wide totals when those are taken. | 131 | * the process-wide totals when those are taken. |
132 | * The group leader stays around as a zombie as long | 132 | * The group leader stays around as a zombie as long |
133 | * as there are other threads. When it gets reaped, | 133 | * as there are other threads. When it gets reaped, |
134 | * the exit.c code will add its counts into these totals. | 134 | * the exit.c code will add its counts into these totals. |
135 | * We won't ever get here for the group leader, since it | 135 | * We won't ever get here for the group leader, since it |
136 | * will have been the last reference on the signal_struct. | 136 | * will have been the last reference on the signal_struct. |
137 | */ | 137 | */ |
138 | sig->utime += tsk->utime; | 138 | sig->utime += tsk->utime; |
139 | sig->stime += tsk->stime; | 139 | sig->stime += tsk->stime; |
140 | sig->gtime += tsk->gtime; | 140 | sig->gtime += tsk->gtime; |
141 | sig->min_flt += tsk->min_flt; | 141 | sig->min_flt += tsk->min_flt; |
142 | sig->maj_flt += tsk->maj_flt; | 142 | sig->maj_flt += tsk->maj_flt; |
143 | sig->nvcsw += tsk->nvcsw; | 143 | sig->nvcsw += tsk->nvcsw; |
144 | sig->nivcsw += tsk->nivcsw; | 144 | sig->nivcsw += tsk->nivcsw; |
145 | sig->inblock += task_io_get_inblock(tsk); | 145 | sig->inblock += task_io_get_inblock(tsk); |
146 | sig->oublock += task_io_get_oublock(tsk); | 146 | sig->oublock += task_io_get_oublock(tsk); |
147 | task_io_accounting_add(&sig->ioac, &tsk->ioac); | 147 | task_io_accounting_add(&sig->ioac, &tsk->ioac); |
148 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; | 148 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; |
149 | } | 149 | } |
150 | 150 | ||
151 | sig->nr_threads--; | 151 | sig->nr_threads--; |
152 | __unhash_process(tsk, group_dead); | 152 | __unhash_process(tsk, group_dead); |
153 | 153 | ||
154 | /* | 154 | /* |
155 | * Do this under ->siglock, we can race with another thread | 155 | * Do this under ->siglock, we can race with another thread |
156 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. | 156 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. |
157 | */ | 157 | */ |
158 | flush_sigqueue(&tsk->pending); | 158 | flush_sigqueue(&tsk->pending); |
159 | tsk->sighand = NULL; | 159 | tsk->sighand = NULL; |
160 | spin_unlock(&sighand->siglock); | 160 | spin_unlock(&sighand->siglock); |
161 | 161 | ||
162 | __cleanup_sighand(sighand); | 162 | __cleanup_sighand(sighand); |
163 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); | 163 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); |
164 | if (group_dead) { | 164 | if (group_dead) { |
165 | flush_sigqueue(&sig->shared_pending); | 165 | flush_sigqueue(&sig->shared_pending); |
166 | tty_kref_put(tty); | 166 | tty_kref_put(tty); |
167 | } | 167 | } |
168 | } | 168 | } |
169 | 169 | ||
170 | static void delayed_put_task_struct(struct rcu_head *rhp) | 170 | static void delayed_put_task_struct(struct rcu_head *rhp) |
171 | { | 171 | { |
172 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | 172 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
173 | 173 | ||
174 | perf_event_delayed_put(tsk); | 174 | perf_event_delayed_put(tsk); |
175 | trace_sched_process_free(tsk); | 175 | trace_sched_process_free(tsk); |
176 | put_task_struct(tsk); | 176 | put_task_struct(tsk); |
177 | } | 177 | } |
178 | 178 | ||
179 | 179 | ||
180 | void release_task(struct task_struct * p) | 180 | void release_task(struct task_struct * p) |
181 | { | 181 | { |
182 | struct task_struct *leader; | 182 | struct task_struct *leader; |
183 | int zap_leader; | 183 | int zap_leader; |
184 | repeat: | 184 | repeat: |
185 | /* don't need to get the RCU readlock here - the process is dead and | 185 | /* don't need to get the RCU readlock here - the process is dead and |
186 | * can't be modifying its own credentials. But shut RCU-lockdep up */ | 186 | * can't be modifying its own credentials. But shut RCU-lockdep up */ |
187 | rcu_read_lock(); | 187 | rcu_read_lock(); |
188 | atomic_dec(&__task_cred(p)->user->processes); | 188 | atomic_dec(&__task_cred(p)->user->processes); |
189 | rcu_read_unlock(); | 189 | rcu_read_unlock(); |
190 | 190 | ||
191 | proc_flush_task(p); | 191 | proc_flush_task(p); |
192 | 192 | ||
193 | write_lock_irq(&tasklist_lock); | 193 | write_lock_irq(&tasklist_lock); |
194 | ptrace_release_task(p); | 194 | ptrace_release_task(p); |
195 | __exit_signal(p); | 195 | __exit_signal(p); |
196 | 196 | ||
197 | /* | 197 | /* |
198 | * If we are the last non-leader member of the thread | 198 | * If we are the last non-leader member of the thread |
199 | * group, and the leader is zombie, then notify the | 199 | * group, and the leader is zombie, then notify the |
200 | * group leader's parent process. (if it wants notification.) | 200 | * group leader's parent process. (if it wants notification.) |
201 | */ | 201 | */ |
202 | zap_leader = 0; | 202 | zap_leader = 0; |
203 | leader = p->group_leader; | 203 | leader = p->group_leader; |
204 | if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { | 204 | if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { |
205 | /* | 205 | /* |
206 | * If we were the last child thread and the leader has | 206 | * If we were the last child thread and the leader has |
207 | * exited already, and the leader's parent ignores SIGCHLD, | 207 | * exited already, and the leader's parent ignores SIGCHLD, |
208 | * then we are the one who should release the leader. | 208 | * then we are the one who should release the leader. |
209 | */ | 209 | */ |
210 | zap_leader = do_notify_parent(leader, leader->exit_signal); | 210 | zap_leader = do_notify_parent(leader, leader->exit_signal); |
211 | if (zap_leader) | 211 | if (zap_leader) |
212 | leader->exit_state = EXIT_DEAD; | 212 | leader->exit_state = EXIT_DEAD; |
213 | } | 213 | } |
214 | 214 | ||
215 | write_unlock_irq(&tasklist_lock); | 215 | write_unlock_irq(&tasklist_lock); |
216 | release_thread(p); | 216 | release_thread(p); |
217 | call_rcu(&p->rcu, delayed_put_task_struct); | 217 | call_rcu(&p->rcu, delayed_put_task_struct); |
218 | 218 | ||
219 | p = leader; | 219 | p = leader; |
220 | if (unlikely(zap_leader)) | 220 | if (unlikely(zap_leader)) |
221 | goto repeat; | 221 | goto repeat; |
222 | } | 222 | } |
223 | 223 | ||
224 | /* | 224 | /* |
225 | * This checks not only the pgrp, but falls back on the pid if no | 225 | * This checks not only the pgrp, but falls back on the pid if no |
226 | * satisfactory pgrp is found. I dunno - gdb doesn't work correctly | 226 | * satisfactory pgrp is found. I dunno - gdb doesn't work correctly |
227 | * without this... | 227 | * without this... |
228 | * | 228 | * |
229 | * The caller must hold rcu lock or the tasklist lock. | 229 | * The caller must hold rcu lock or the tasklist lock. |
230 | */ | 230 | */ |
231 | struct pid *session_of_pgrp(struct pid *pgrp) | 231 | struct pid *session_of_pgrp(struct pid *pgrp) |
232 | { | 232 | { |
233 | struct task_struct *p; | 233 | struct task_struct *p; |
234 | struct pid *sid = NULL; | 234 | struct pid *sid = NULL; |
235 | 235 | ||
236 | p = pid_task(pgrp, PIDTYPE_PGID); | 236 | p = pid_task(pgrp, PIDTYPE_PGID); |
237 | if (p == NULL) | 237 | if (p == NULL) |
238 | p = pid_task(pgrp, PIDTYPE_PID); | 238 | p = pid_task(pgrp, PIDTYPE_PID); |
239 | if (p != NULL) | 239 | if (p != NULL) |
240 | sid = task_session(p); | 240 | sid = task_session(p); |
241 | 241 | ||
242 | return sid; | 242 | return sid; |
243 | } | 243 | } |
244 | 244 | ||
245 | /* | 245 | /* |
246 | * Determine if a process group is "orphaned", according to the POSIX | 246 | * Determine if a process group is "orphaned", according to the POSIX |
247 | * definition in 2.2.2.52. Orphaned process groups are not to be affected | 247 | * definition in 2.2.2.52. Orphaned process groups are not to be affected |
248 | * by terminal-generated stop signals. Newly orphaned process groups are | 248 | * by terminal-generated stop signals. Newly orphaned process groups are |
249 | * to receive a SIGHUP and a SIGCONT. | 249 | * to receive a SIGHUP and a SIGCONT. |
250 | * | 250 | * |
251 | * "I ask you, have you ever known what it is to be an orphan?" | 251 | * "I ask you, have you ever known what it is to be an orphan?" |
252 | */ | 252 | */ |
253 | static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) | 253 | static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task) |
254 | { | 254 | { |
255 | struct task_struct *p; | 255 | struct task_struct *p; |
256 | 256 | ||
257 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 257 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { |
258 | if ((p == ignored_task) || | 258 | if ((p == ignored_task) || |
259 | (p->exit_state && thread_group_empty(p)) || | 259 | (p->exit_state && thread_group_empty(p)) || |
260 | is_global_init(p->real_parent)) | 260 | is_global_init(p->real_parent)) |
261 | continue; | 261 | continue; |
262 | 262 | ||
263 | if (task_pgrp(p->real_parent) != pgrp && | 263 | if (task_pgrp(p->real_parent) != pgrp && |
264 | task_session(p->real_parent) == task_session(p)) | 264 | task_session(p->real_parent) == task_session(p)) |
265 | return 0; | 265 | return 0; |
266 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 266 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); |
267 | 267 | ||
268 | return 1; | 268 | return 1; |
269 | } | 269 | } |
270 | 270 | ||
271 | int is_current_pgrp_orphaned(void) | 271 | int is_current_pgrp_orphaned(void) |
272 | { | 272 | { |
273 | int retval; | 273 | int retval; |
274 | 274 | ||
275 | read_lock(&tasklist_lock); | 275 | read_lock(&tasklist_lock); |
276 | retval = will_become_orphaned_pgrp(task_pgrp(current), NULL); | 276 | retval = will_become_orphaned_pgrp(task_pgrp(current), NULL); |
277 | read_unlock(&tasklist_lock); | 277 | read_unlock(&tasklist_lock); |
278 | 278 | ||
279 | return retval; | 279 | return retval; |
280 | } | 280 | } |
281 | 281 | ||
282 | static bool has_stopped_jobs(struct pid *pgrp) | 282 | static bool has_stopped_jobs(struct pid *pgrp) |
283 | { | 283 | { |
284 | struct task_struct *p; | 284 | struct task_struct *p; |
285 | 285 | ||
286 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 286 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { |
287 | if (p->signal->flags & SIGNAL_STOP_STOPPED) | 287 | if (p->signal->flags & SIGNAL_STOP_STOPPED) |
288 | return true; | 288 | return true; |
289 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 289 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); |
290 | 290 | ||
291 | return false; | 291 | return false; |
292 | } | 292 | } |
293 | 293 | ||
294 | /* | 294 | /* |
295 | * Check to see if any process groups have become orphaned as | 295 | * Check to see if any process groups have become orphaned as |
296 | * a result of our exiting, and if they have any stopped jobs, | 296 | * a result of our exiting, and if they have any stopped jobs, |
297 | * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | 297 | * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) |
298 | */ | 298 | */ |
299 | static void | 299 | static void |
300 | kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) | 300 | kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent) |
301 | { | 301 | { |
302 | struct pid *pgrp = task_pgrp(tsk); | 302 | struct pid *pgrp = task_pgrp(tsk); |
303 | struct task_struct *ignored_task = tsk; | 303 | struct task_struct *ignored_task = tsk; |
304 | 304 | ||
305 | if (!parent) | 305 | if (!parent) |
306 | /* exit: our father is in a different pgrp than | 306 | /* exit: our father is in a different pgrp than |
307 | * we are and we were the only connection outside. | 307 | * we are and we were the only connection outside. |
308 | */ | 308 | */ |
309 | parent = tsk->real_parent; | 309 | parent = tsk->real_parent; |
310 | else | 310 | else |
311 | /* reparent: our child is in a different pgrp than | 311 | /* reparent: our child is in a different pgrp than |
312 | * we are, and it was the only connection outside. | 312 | * we are, and it was the only connection outside. |
313 | */ | 313 | */ |
314 | ignored_task = NULL; | 314 | ignored_task = NULL; |
315 | 315 | ||
316 | if (task_pgrp(parent) != pgrp && | 316 | if (task_pgrp(parent) != pgrp && |
317 | task_session(parent) == task_session(tsk) && | 317 | task_session(parent) == task_session(tsk) && |
318 | will_become_orphaned_pgrp(pgrp, ignored_task) && | 318 | will_become_orphaned_pgrp(pgrp, ignored_task) && |
319 | has_stopped_jobs(pgrp)) { | 319 | has_stopped_jobs(pgrp)) { |
320 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); | 320 | __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp); |
321 | __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); | 321 | __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); |
322 | } | 322 | } |
323 | } | 323 | } |
324 | 324 | ||
325 | /** | 325 | /** |
326 | * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd | 326 | * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd |
327 | * | 327 | * |
328 | * If a kernel thread is launched as a result of a system call, or if | 328 | * If a kernel thread is launched as a result of a system call, or if |
329 | * it ever exits, it should generally reparent itself to kthreadd so it | 329 | * it ever exits, it should generally reparent itself to kthreadd so it |
330 | * isn't in the way of other processes and is correctly cleaned up on exit. | 330 | * isn't in the way of other processes and is correctly cleaned up on exit. |
331 | * | 331 | * |
332 | * The various task state such as scheduling policy and priority may have | 332 | * The various task state such as scheduling policy and priority may have |
333 | * been inherited from a user process, so we reset them to sane values here. | 333 | * been inherited from a user process, so we reset them to sane values here. |
334 | * | 334 | * |
335 | * NOTE that reparent_to_kthreadd() gives the caller full capabilities. | 335 | * NOTE that reparent_to_kthreadd() gives the caller full capabilities. |
336 | */ | 336 | */ |
337 | static void reparent_to_kthreadd(void) | 337 | static void reparent_to_kthreadd(void) |
338 | { | 338 | { |
339 | write_lock_irq(&tasklist_lock); | 339 | write_lock_irq(&tasklist_lock); |
340 | 340 | ||
341 | ptrace_unlink(current); | 341 | ptrace_unlink(current); |
342 | /* Reparent to init */ | 342 | /* Reparent to init */ |
343 | current->real_parent = current->parent = kthreadd_task; | 343 | current->real_parent = current->parent = kthreadd_task; |
344 | list_move_tail(¤t->sibling, ¤t->real_parent->children); | 344 | list_move_tail(¤t->sibling, ¤t->real_parent->children); |
345 | 345 | ||
346 | /* Set the exit signal to SIGCHLD so we signal init on exit */ | 346 | /* Set the exit signal to SIGCHLD so we signal init on exit */ |
347 | current->exit_signal = SIGCHLD; | 347 | current->exit_signal = SIGCHLD; |
348 | 348 | ||
349 | if (task_nice(current) < 0) | 349 | if (task_nice(current) < 0) |
350 | set_user_nice(current, 0); | 350 | set_user_nice(current, 0); |
351 | /* cpus_allowed? */ | 351 | /* cpus_allowed? */ |
352 | /* rt_priority? */ | 352 | /* rt_priority? */ |
353 | /* signals? */ | 353 | /* signals? */ |
354 | memcpy(current->signal->rlim, init_task.signal->rlim, | 354 | memcpy(current->signal->rlim, init_task.signal->rlim, |
355 | sizeof(current->signal->rlim)); | 355 | sizeof(current->signal->rlim)); |
356 | 356 | ||
357 | atomic_inc(&init_cred.usage); | 357 | atomic_inc(&init_cred.usage); |
358 | commit_creds(&init_cred); | 358 | commit_creds(&init_cred); |
359 | write_unlock_irq(&tasklist_lock); | 359 | write_unlock_irq(&tasklist_lock); |
360 | } | 360 | } |
361 | 361 | ||
362 | void __set_special_pids(struct pid *pid) | 362 | void __set_special_pids(struct pid *pid) |
363 | { | 363 | { |
364 | struct task_struct *curr = current->group_leader; | 364 | struct task_struct *curr = current->group_leader; |
365 | 365 | ||
366 | if (task_session(curr) != pid) | 366 | if (task_session(curr) != pid) |
367 | change_pid(curr, PIDTYPE_SID, pid); | 367 | change_pid(curr, PIDTYPE_SID, pid); |
368 | 368 | ||
369 | if (task_pgrp(curr) != pid) | 369 | if (task_pgrp(curr) != pid) |
370 | change_pid(curr, PIDTYPE_PGID, pid); | 370 | change_pid(curr, PIDTYPE_PGID, pid); |
371 | } | 371 | } |
372 | 372 | ||
373 | static void set_special_pids(struct pid *pid) | 373 | static void set_special_pids(struct pid *pid) |
374 | { | 374 | { |
375 | write_lock_irq(&tasklist_lock); | 375 | write_lock_irq(&tasklist_lock); |
376 | __set_special_pids(pid); | 376 | __set_special_pids(pid); |
377 | write_unlock_irq(&tasklist_lock); | 377 | write_unlock_irq(&tasklist_lock); |
378 | } | 378 | } |
379 | 379 | ||
380 | /* | 380 | /* |
381 | * Let kernel threads use this to say that they allow a certain signal. | 381 | * Let kernel threads use this to say that they allow a certain signal. |
382 | * Must not be used if kthread was cloned with CLONE_SIGHAND. | 382 | * Must not be used if kthread was cloned with CLONE_SIGHAND. |
383 | */ | 383 | */ |
384 | int allow_signal(int sig) | 384 | int allow_signal(int sig) |
385 | { | 385 | { |
386 | if (!valid_signal(sig) || sig < 1) | 386 | if (!valid_signal(sig) || sig < 1) |
387 | return -EINVAL; | 387 | return -EINVAL; |
388 | 388 | ||
389 | spin_lock_irq(¤t->sighand->siglock); | 389 | spin_lock_irq(¤t->sighand->siglock); |
390 | /* This is only needed for daemonize()'ed kthreads */ | 390 | /* This is only needed for daemonize()'ed kthreads */ |
391 | sigdelset(¤t->blocked, sig); | 391 | sigdelset(¤t->blocked, sig); |
392 | /* | 392 | /* |
393 | * Kernel threads handle their own signals. Let the signal code | 393 | * Kernel threads handle their own signals. Let the signal code |
394 | * know it'll be handled, so that they don't get converted to | 394 | * know it'll be handled, so that they don't get converted to |
395 | * SIGKILL or just silently dropped. | 395 | * SIGKILL or just silently dropped. |
396 | */ | 396 | */ |
397 | current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; | 397 | current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; |
398 | recalc_sigpending(); | 398 | recalc_sigpending(); |
399 | spin_unlock_irq(¤t->sighand->siglock); | 399 | spin_unlock_irq(¤t->sighand->siglock); |
400 | return 0; | 400 | return 0; |
401 | } | 401 | } |
402 | 402 | ||
403 | EXPORT_SYMBOL(allow_signal); | 403 | EXPORT_SYMBOL(allow_signal); |
404 | 404 | ||
405 | int disallow_signal(int sig) | 405 | int disallow_signal(int sig) |
406 | { | 406 | { |
407 | if (!valid_signal(sig) || sig < 1) | 407 | if (!valid_signal(sig) || sig < 1) |
408 | return -EINVAL; | 408 | return -EINVAL; |
409 | 409 | ||
410 | spin_lock_irq(¤t->sighand->siglock); | 410 | spin_lock_irq(¤t->sighand->siglock); |
411 | current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN; | 411 | current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN; |
412 | recalc_sigpending(); | 412 | recalc_sigpending(); |
413 | spin_unlock_irq(¤t->sighand->siglock); | 413 | spin_unlock_irq(¤t->sighand->siglock); |
414 | return 0; | 414 | return 0; |
415 | } | 415 | } |
416 | 416 | ||
417 | EXPORT_SYMBOL(disallow_signal); | 417 | EXPORT_SYMBOL(disallow_signal); |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * Put all the gunge required to become a kernel thread without | 420 | * Put all the gunge required to become a kernel thread without |
421 | * attached user resources in one place where it belongs. | 421 | * attached user resources in one place where it belongs. |
422 | */ | 422 | */ |
423 | 423 | ||
424 | void daemonize(const char *name, ...) | 424 | void daemonize(const char *name, ...) |
425 | { | 425 | { |
426 | va_list args; | 426 | va_list args; |
427 | sigset_t blocked; | 427 | sigset_t blocked; |
428 | 428 | ||
429 | va_start(args, name); | 429 | va_start(args, name); |
430 | vsnprintf(current->comm, sizeof(current->comm), name, args); | 430 | vsnprintf(current->comm, sizeof(current->comm), name, args); |
431 | va_end(args); | 431 | va_end(args); |
432 | 432 | ||
433 | /* | 433 | /* |
434 | * If we were started as result of loading a module, close all of the | 434 | * If we were started as result of loading a module, close all of the |
435 | * user space pages. We don't need them, and if we didn't close them | 435 | * user space pages. We don't need them, and if we didn't close them |
436 | * they would be locked into memory. | 436 | * they would be locked into memory. |
437 | */ | 437 | */ |
438 | exit_mm(current); | 438 | exit_mm(current); |
439 | /* | 439 | /* |
440 | * We don't want to get frozen, in case system-wide hibernation | 440 | * We don't want to get frozen, in case system-wide hibernation |
441 | * or suspend transition begins right now. | 441 | * or suspend transition begins right now. |
442 | */ | 442 | */ |
443 | current->flags |= (PF_NOFREEZE | PF_KTHREAD); | 443 | current->flags |= (PF_NOFREEZE | PF_KTHREAD); |
444 | 444 | ||
445 | if (current->nsproxy != &init_nsproxy) { | 445 | if (current->nsproxy != &init_nsproxy) { |
446 | get_nsproxy(&init_nsproxy); | 446 | get_nsproxy(&init_nsproxy); |
447 | switch_task_namespaces(current, &init_nsproxy); | 447 | switch_task_namespaces(current, &init_nsproxy); |
448 | } | 448 | } |
449 | set_special_pids(&init_struct_pid); | 449 | set_special_pids(&init_struct_pid); |
450 | proc_clear_tty(current); | 450 | proc_clear_tty(current); |
451 | 451 | ||
452 | /* Block and flush all signals */ | 452 | /* Block and flush all signals */ |
453 | sigfillset(&blocked); | 453 | sigfillset(&blocked); |
454 | sigprocmask(SIG_BLOCK, &blocked, NULL); | 454 | sigprocmask(SIG_BLOCK, &blocked, NULL); |
455 | flush_signals(current); | 455 | flush_signals(current); |
456 | 456 | ||
457 | /* Become as one with the init task */ | 457 | /* Become as one with the init task */ |
458 | 458 | ||
459 | daemonize_fs_struct(); | 459 | daemonize_fs_struct(); |
460 | daemonize_descriptors(); | 460 | daemonize_descriptors(); |
461 | 461 | ||
462 | reparent_to_kthreadd(); | 462 | reparent_to_kthreadd(); |
463 | } | 463 | } |
464 | 464 | ||
465 | EXPORT_SYMBOL(daemonize); | 465 | EXPORT_SYMBOL(daemonize); |
466 | 466 | ||
467 | #ifdef CONFIG_MM_OWNER | 467 | #ifdef CONFIG_MM_OWNER |
468 | /* | 468 | /* |
469 | * A task is exiting. If it owned this mm, find a new owner for the mm. | 469 | * A task is exiting. If it owned this mm, find a new owner for the mm. |
470 | */ | 470 | */ |
471 | void mm_update_next_owner(struct mm_struct *mm) | 471 | void mm_update_next_owner(struct mm_struct *mm) |
472 | { | 472 | { |
473 | struct task_struct *c, *g, *p = current; | 473 | struct task_struct *c, *g, *p = current; |
474 | 474 | ||
475 | retry: | 475 | retry: |
476 | /* | 476 | /* |
477 | * If the exiting or execing task is not the owner, it's | 477 | * If the exiting or execing task is not the owner, it's |
478 | * someone else's problem. | 478 | * someone else's problem. |
479 | */ | 479 | */ |
480 | if (mm->owner != p) | 480 | if (mm->owner != p) |
481 | return; | 481 | return; |
482 | /* | 482 | /* |
483 | * The current owner is exiting/execing and there are no other | 483 | * The current owner is exiting/execing and there are no other |
484 | * candidates. Do not leave the mm pointing to a possibly | 484 | * candidates. Do not leave the mm pointing to a possibly |
485 | * freed task structure. | 485 | * freed task structure. |
486 | */ | 486 | */ |
487 | if (atomic_read(&mm->mm_users) <= 1) { | 487 | if (atomic_read(&mm->mm_users) <= 1) { |
488 | mm->owner = NULL; | 488 | mm->owner = NULL; |
489 | return; | 489 | return; |
490 | } | 490 | } |
491 | 491 | ||
492 | read_lock(&tasklist_lock); | 492 | read_lock(&tasklist_lock); |
493 | /* | 493 | /* |
494 | * Search in the children | 494 | * Search in the children |
495 | */ | 495 | */ |
496 | list_for_each_entry(c, &p->children, sibling) { | 496 | list_for_each_entry(c, &p->children, sibling) { |
497 | if (c->mm == mm) | 497 | if (c->mm == mm) |
498 | goto assign_new_owner; | 498 | goto assign_new_owner; |
499 | } | 499 | } |
500 | 500 | ||
501 | /* | 501 | /* |
502 | * Search in the siblings | 502 | * Search in the siblings |
503 | */ | 503 | */ |
504 | list_for_each_entry(c, &p->real_parent->children, sibling) { | 504 | list_for_each_entry(c, &p->real_parent->children, sibling) { |
505 | if (c->mm == mm) | 505 | if (c->mm == mm) |
506 | goto assign_new_owner; | 506 | goto assign_new_owner; |
507 | } | 507 | } |
508 | 508 | ||
509 | /* | 509 | /* |
510 | * Search through everything else. We should not get | 510 | * Search through everything else. We should not get |
511 | * here often | 511 | * here often |
512 | */ | 512 | */ |
513 | do_each_thread(g, c) { | 513 | do_each_thread(g, c) { |
514 | if (c->mm == mm) | 514 | if (c->mm == mm) |
515 | goto assign_new_owner; | 515 | goto assign_new_owner; |
516 | } while_each_thread(g, c); | 516 | } while_each_thread(g, c); |
517 | 517 | ||
518 | read_unlock(&tasklist_lock); | 518 | read_unlock(&tasklist_lock); |
519 | /* | 519 | /* |
520 | * We found no owner yet mm_users > 1: this implies that we are | 520 | * We found no owner yet mm_users > 1: this implies that we are |
521 | * most likely racing with swapoff (try_to_unuse()) or /proc or | 521 | * most likely racing with swapoff (try_to_unuse()) or /proc or |
522 | * ptrace or page migration (get_task_mm()). Mark owner as NULL. | 522 | * ptrace or page migration (get_task_mm()). Mark owner as NULL. |
523 | */ | 523 | */ |
524 | mm->owner = NULL; | 524 | mm->owner = NULL; |
525 | return; | 525 | return; |
526 | 526 | ||
527 | assign_new_owner: | 527 | assign_new_owner: |
528 | BUG_ON(c == p); | 528 | BUG_ON(c == p); |
529 | get_task_struct(c); | 529 | get_task_struct(c); |
530 | /* | 530 | /* |
531 | * The task_lock protects c->mm from changing. | 531 | * The task_lock protects c->mm from changing. |
532 | * We always want mm->owner->mm == mm | 532 | * We always want mm->owner->mm == mm |
533 | */ | 533 | */ |
534 | task_lock(c); | 534 | task_lock(c); |
535 | /* | 535 | /* |
536 | * Delay read_unlock() till we have the task_lock() | 536 | * Delay read_unlock() till we have the task_lock() |
537 | * to ensure that c does not slip away underneath us | 537 | * to ensure that c does not slip away underneath us |
538 | */ | 538 | */ |
539 | read_unlock(&tasklist_lock); | 539 | read_unlock(&tasklist_lock); |
540 | if (c->mm != mm) { | 540 | if (c->mm != mm) { |
541 | task_unlock(c); | 541 | task_unlock(c); |
542 | put_task_struct(c); | 542 | put_task_struct(c); |
543 | goto retry; | 543 | goto retry; |
544 | } | 544 | } |
545 | mm->owner = c; | 545 | mm->owner = c; |
546 | task_unlock(c); | 546 | task_unlock(c); |
547 | put_task_struct(c); | 547 | put_task_struct(c); |
548 | } | 548 | } |
549 | #endif /* CONFIG_MM_OWNER */ | 549 | #endif /* CONFIG_MM_OWNER */ |
550 | 550 | ||
551 | /* | 551 | /* |
552 | * Turn us into a lazy TLB process if we | 552 | * Turn us into a lazy TLB process if we |
553 | * aren't already.. | 553 | * aren't already.. |
554 | */ | 554 | */ |
555 | static void exit_mm(struct task_struct * tsk) | 555 | static void exit_mm(struct task_struct * tsk) |
556 | { | 556 | { |
557 | struct mm_struct *mm = tsk->mm; | 557 | struct mm_struct *mm = tsk->mm; |
558 | struct core_state *core_state; | 558 | struct core_state *core_state; |
559 | 559 | ||
560 | mm_release(tsk, mm); | 560 | mm_release(tsk, mm); |
561 | if (!mm) | 561 | if (!mm) |
562 | return; | 562 | return; |
563 | sync_mm_rss(mm); | 563 | sync_mm_rss(mm); |
564 | /* | 564 | /* |
565 | * Serialize with any possible pending coredump. | 565 | * Serialize with any possible pending coredump. |
566 | * We must hold mmap_sem around checking core_state | 566 | * We must hold mmap_sem around checking core_state |
567 | * and clearing tsk->mm. The core-inducing thread | 567 | * and clearing tsk->mm. The core-inducing thread |
568 | * will increment ->nr_threads for each thread in the | 568 | * will increment ->nr_threads for each thread in the |
569 | * group with ->mm != NULL. | 569 | * group with ->mm != NULL. |
570 | */ | 570 | */ |
571 | down_read(&mm->mmap_sem); | 571 | down_read(&mm->mmap_sem); |
572 | core_state = mm->core_state; | 572 | core_state = mm->core_state; |
573 | if (core_state) { | 573 | if (core_state) { |
574 | struct core_thread self; | 574 | struct core_thread self; |
575 | up_read(&mm->mmap_sem); | 575 | up_read(&mm->mmap_sem); |
576 | 576 | ||
577 | self.task = tsk; | 577 | self.task = tsk; |
578 | self.next = xchg(&core_state->dumper.next, &self); | 578 | self.next = xchg(&core_state->dumper.next, &self); |
579 | /* | 579 | /* |
580 | * Implies mb(), the result of xchg() must be visible | 580 | * Implies mb(), the result of xchg() must be visible |
581 | * to core_state->dumper. | 581 | * to core_state->dumper. |
582 | */ | 582 | */ |
583 | if (atomic_dec_and_test(&core_state->nr_threads)) | 583 | if (atomic_dec_and_test(&core_state->nr_threads)) |
584 | complete(&core_state->startup); | 584 | complete(&core_state->startup); |
585 | 585 | ||
586 | for (;;) { | 586 | for (;;) { |
587 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 587 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
588 | if (!self.task) /* see coredump_finish() */ | 588 | if (!self.task) /* see coredump_finish() */ |
589 | break; | 589 | break; |
590 | schedule(); | 590 | schedule(); |
591 | } | 591 | } |
592 | __set_task_state(tsk, TASK_RUNNING); | 592 | __set_task_state(tsk, TASK_RUNNING); |
593 | down_read(&mm->mmap_sem); | 593 | down_read(&mm->mmap_sem); |
594 | } | 594 | } |
595 | atomic_inc(&mm->mm_count); | 595 | atomic_inc(&mm->mm_count); |
596 | BUG_ON(mm != tsk->active_mm); | 596 | BUG_ON(mm != tsk->active_mm); |
597 | /* more a memory barrier than a real lock */ | 597 | /* more a memory barrier than a real lock */ |
598 | task_lock(tsk); | 598 | task_lock(tsk); |
599 | tsk->mm = NULL; | 599 | tsk->mm = NULL; |
600 | up_read(&mm->mmap_sem); | 600 | up_read(&mm->mmap_sem); |
601 | enter_lazy_tlb(mm, current); | 601 | enter_lazy_tlb(mm, current); |
602 | task_unlock(tsk); | 602 | task_unlock(tsk); |
603 | mm_update_next_owner(mm); | 603 | mm_update_next_owner(mm); |
604 | mmput(mm); | 604 | mmput(mm); |
605 | } | 605 | } |
606 | 606 | ||
607 | /* | 607 | /* |
608 | * When we die, we re-parent all our children, and try to: | 608 | * When we die, we re-parent all our children, and try to: |
609 | * 1. give them to another thread in our thread group, if such a member exists | 609 | * 1. give them to another thread in our thread group, if such a member exists |
610 | * 2. give it to the first ancestor process which prctl'd itself as a | 610 | * 2. give it to the first ancestor process which prctl'd itself as a |
611 | * child_subreaper for its children (like a service manager) | 611 | * child_subreaper for its children (like a service manager) |
612 | * 3. give it to the init process (PID 1) in our pid namespace | 612 | * 3. give it to the init process (PID 1) in our pid namespace |
613 | */ | 613 | */ |
614 | static struct task_struct *find_new_reaper(struct task_struct *father) | 614 | static struct task_struct *find_new_reaper(struct task_struct *father) |
615 | __releases(&tasklist_lock) | 615 | __releases(&tasklist_lock) |
616 | __acquires(&tasklist_lock) | 616 | __acquires(&tasklist_lock) |
617 | { | 617 | { |
618 | struct pid_namespace *pid_ns = task_active_pid_ns(father); | 618 | struct pid_namespace *pid_ns = task_active_pid_ns(father); |
619 | struct task_struct *thread; | 619 | struct task_struct *thread; |
620 | 620 | ||
621 | thread = father; | 621 | thread = father; |
622 | while_each_thread(father, thread) { | 622 | while_each_thread(father, thread) { |
623 | if (thread->flags & PF_EXITING) | 623 | if (thread->flags & PF_EXITING) |
624 | continue; | 624 | continue; |
625 | if (unlikely(pid_ns->child_reaper == father)) | 625 | if (unlikely(pid_ns->child_reaper == father)) |
626 | pid_ns->child_reaper = thread; | 626 | pid_ns->child_reaper = thread; |
627 | return thread; | 627 | return thread; |
628 | } | 628 | } |
629 | 629 | ||
630 | if (unlikely(pid_ns->child_reaper == father)) { | 630 | if (unlikely(pid_ns->child_reaper == father)) { |
631 | write_unlock_irq(&tasklist_lock); | 631 | write_unlock_irq(&tasklist_lock); |
632 | if (unlikely(pid_ns == &init_pid_ns)) { | 632 | if (unlikely(pid_ns == &init_pid_ns)) { |
633 | panic("Attempted to kill init! exitcode=0x%08x\n", | 633 | panic("Attempted to kill init! exitcode=0x%08x\n", |
634 | father->signal->group_exit_code ?: | 634 | father->signal->group_exit_code ?: |
635 | father->exit_code); | 635 | father->exit_code); |
636 | } | 636 | } |
637 | 637 | ||
638 | zap_pid_ns_processes(pid_ns); | 638 | zap_pid_ns_processes(pid_ns); |
639 | write_lock_irq(&tasklist_lock); | 639 | write_lock_irq(&tasklist_lock); |
640 | } else if (father->signal->has_child_subreaper) { | 640 | } else if (father->signal->has_child_subreaper) { |
641 | struct task_struct *reaper; | 641 | struct task_struct *reaper; |
642 | 642 | ||
643 | /* | 643 | /* |
644 | * Find the first ancestor marked as child_subreaper. | 644 | * Find the first ancestor marked as child_subreaper. |
645 | * Note that the code below checks same_thread_group(reaper, | 645 | * Note that the code below checks same_thread_group(reaper, |
646 | * pid_ns->child_reaper). This is what we need to DTRT in a | 646 | * pid_ns->child_reaper). This is what we need to DTRT in a |
647 | * PID namespace. However we still need the check above, see | 647 | * PID namespace. However we still need the check above, see |
648 | * http://marc.info/?l=linux-kernel&m=131385460420380 | 648 | * http://marc.info/?l=linux-kernel&m=131385460420380 |
649 | */ | 649 | */ |
650 | for (reaper = father->real_parent; | 650 | for (reaper = father->real_parent; |
651 | reaper != &init_task; | 651 | reaper != &init_task; |
652 | reaper = reaper->real_parent) { | 652 | reaper = reaper->real_parent) { |
653 | if (same_thread_group(reaper, pid_ns->child_reaper)) | 653 | if (same_thread_group(reaper, pid_ns->child_reaper)) |
654 | break; | 654 | break; |
655 | if (!reaper->signal->is_child_subreaper) | 655 | if (!reaper->signal->is_child_subreaper) |
656 | continue; | 656 | continue; |
657 | thread = reaper; | 657 | thread = reaper; |
658 | do { | 658 | do { |
659 | if (!(thread->flags & PF_EXITING)) | 659 | if (!(thread->flags & PF_EXITING)) |
660 | return reaper; | 660 | return reaper; |
661 | } while_each_thread(reaper, thread); | 661 | } while_each_thread(reaper, thread); |
662 | } | 662 | } |
663 | } | 663 | } |
664 | 664 | ||
665 | return pid_ns->child_reaper; | 665 | return pid_ns->child_reaper; |
666 | } | 666 | } |
667 | 667 | ||
668 | /* | 668 | /* |
669 | * Any that need to be release_task'd are put on the @dead list. | 669 | * Any that need to be release_task'd are put on the @dead list. |
670 | */ | 670 | */ |
671 | static void reparent_leader(struct task_struct *father, struct task_struct *p, | 671 | static void reparent_leader(struct task_struct *father, struct task_struct *p, |
672 | struct list_head *dead) | 672 | struct list_head *dead) |
673 | { | 673 | { |
674 | list_move_tail(&p->sibling, &p->real_parent->children); | 674 | list_move_tail(&p->sibling, &p->real_parent->children); |
675 | 675 | ||
676 | if (p->exit_state == EXIT_DEAD) | 676 | if (p->exit_state == EXIT_DEAD) |
677 | return; | 677 | return; |
678 | /* | 678 | /* |
679 | * If this is a threaded reparent there is no need to | 679 | * If this is a threaded reparent there is no need to |
680 | * notify anyone anything has happened. | 680 | * notify anyone anything has happened. |
681 | */ | 681 | */ |
682 | if (same_thread_group(p->real_parent, father)) | 682 | if (same_thread_group(p->real_parent, father)) |
683 | return; | 683 | return; |
684 | 684 | ||
685 | /* We don't want people slaying init. */ | 685 | /* We don't want people slaying init. */ |
686 | p->exit_signal = SIGCHLD; | 686 | p->exit_signal = SIGCHLD; |
687 | 687 | ||
688 | /* If it has exited notify the new parent about this child's death. */ | 688 | /* If it has exited notify the new parent about this child's death. */ |
689 | if (!p->ptrace && | 689 | if (!p->ptrace && |
690 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { | 690 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { |
691 | if (do_notify_parent(p, p->exit_signal)) { | 691 | if (do_notify_parent(p, p->exit_signal)) { |
692 | p->exit_state = EXIT_DEAD; | 692 | p->exit_state = EXIT_DEAD; |
693 | list_move_tail(&p->sibling, dead); | 693 | list_move_tail(&p->sibling, dead); |
694 | } | 694 | } |
695 | } | 695 | } |
696 | 696 | ||
697 | kill_orphaned_pgrp(p, father); | 697 | kill_orphaned_pgrp(p, father); |
698 | } | 698 | } |
699 | 699 | ||
700 | static void forget_original_parent(struct task_struct *father) | 700 | static void forget_original_parent(struct task_struct *father) |
701 | { | 701 | { |
702 | struct task_struct *p, *n, *reaper; | 702 | struct task_struct *p, *n, *reaper; |
703 | LIST_HEAD(dead_children); | 703 | LIST_HEAD(dead_children); |
704 | 704 | ||
705 | write_lock_irq(&tasklist_lock); | 705 | write_lock_irq(&tasklist_lock); |
706 | /* | 706 | /* |
707 | * Note that exit_ptrace() and find_new_reaper() might | 707 | * Note that exit_ptrace() and find_new_reaper() might |
708 | * drop tasklist_lock and reacquire it. | 708 | * drop tasklist_lock and reacquire it. |
709 | */ | 709 | */ |
710 | exit_ptrace(father); | 710 | exit_ptrace(father); |
711 | reaper = find_new_reaper(father); | 711 | reaper = find_new_reaper(father); |
712 | 712 | ||
713 | list_for_each_entry_safe(p, n, &father->children, sibling) { | 713 | list_for_each_entry_safe(p, n, &father->children, sibling) { |
714 | struct task_struct *t = p; | 714 | struct task_struct *t = p; |
715 | do { | 715 | do { |
716 | t->real_parent = reaper; | 716 | t->real_parent = reaper; |
717 | if (t->parent == father) { | 717 | if (t->parent == father) { |
718 | BUG_ON(t->ptrace); | 718 | BUG_ON(t->ptrace); |
719 | t->parent = t->real_parent; | 719 | t->parent = t->real_parent; |
720 | } | 720 | } |
721 | if (t->pdeath_signal) | 721 | if (t->pdeath_signal) |
722 | group_send_sig_info(t->pdeath_signal, | 722 | group_send_sig_info(t->pdeath_signal, |
723 | SEND_SIG_NOINFO, t); | 723 | SEND_SIG_NOINFO, t); |
724 | } while_each_thread(p, t); | 724 | } while_each_thread(p, t); |
725 | reparent_leader(father, p, &dead_children); | 725 | reparent_leader(father, p, &dead_children); |
726 | } | 726 | } |
727 | write_unlock_irq(&tasklist_lock); | 727 | write_unlock_irq(&tasklist_lock); |
728 | 728 | ||
729 | BUG_ON(!list_empty(&father->children)); | 729 | BUG_ON(!list_empty(&father->children)); |
730 | 730 | ||
731 | list_for_each_entry_safe(p, n, &dead_children, sibling) { | 731 | list_for_each_entry_safe(p, n, &dead_children, sibling) { |
732 | list_del_init(&p->sibling); | 732 | list_del_init(&p->sibling); |
733 | release_task(p); | 733 | release_task(p); |
734 | } | 734 | } |
735 | } | 735 | } |
736 | 736 | ||
737 | /* | 737 | /* |
738 | * Send signals to all our closest relatives so that they know | 738 | * Send signals to all our closest relatives so that they know |
739 | * to properly mourn us.. | 739 | * to properly mourn us.. |
740 | */ | 740 | */ |
741 | static void exit_notify(struct task_struct *tsk, int group_dead) | 741 | static void exit_notify(struct task_struct *tsk, int group_dead) |
742 | { | 742 | { |
743 | bool autoreap; | 743 | bool autoreap; |
744 | 744 | ||
745 | /* | 745 | /* |
746 | * This does two things: | 746 | * This does two things: |
747 | * | 747 | * |
748 | * A. Make init inherit all the child processes | 748 | * A. Make init inherit all the child processes |
749 | * B. Check to see if any process groups have become orphaned | 749 | * B. Check to see if any process groups have become orphaned |
750 | * as a result of our exiting, and if they have any stopped | 750 | * as a result of our exiting, and if they have any stopped |
751 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | 751 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) |
752 | */ | 752 | */ |
753 | forget_original_parent(tsk); | 753 | forget_original_parent(tsk); |
754 | exit_task_namespaces(tsk); | 754 | exit_task_namespaces(tsk); |
755 | 755 | ||
756 | write_lock_irq(&tasklist_lock); | 756 | write_lock_irq(&tasklist_lock); |
757 | if (group_dead) | 757 | if (group_dead) |
758 | kill_orphaned_pgrp(tsk->group_leader, NULL); | 758 | kill_orphaned_pgrp(tsk->group_leader, NULL); |
759 | 759 | ||
760 | if (unlikely(tsk->ptrace)) { | 760 | if (unlikely(tsk->ptrace)) { |
761 | int sig = thread_group_leader(tsk) && | 761 | int sig = thread_group_leader(tsk) && |
762 | thread_group_empty(tsk) && | 762 | thread_group_empty(tsk) && |
763 | !ptrace_reparented(tsk) ? | 763 | !ptrace_reparented(tsk) ? |
764 | tsk->exit_signal : SIGCHLD; | 764 | tsk->exit_signal : SIGCHLD; |
765 | autoreap = do_notify_parent(tsk, sig); | 765 | autoreap = do_notify_parent(tsk, sig); |
766 | } else if (thread_group_leader(tsk)) { | 766 | } else if (thread_group_leader(tsk)) { |
767 | autoreap = thread_group_empty(tsk) && | 767 | autoreap = thread_group_empty(tsk) && |
768 | do_notify_parent(tsk, tsk->exit_signal); | 768 | do_notify_parent(tsk, tsk->exit_signal); |
769 | } else { | 769 | } else { |
770 | autoreap = true; | 770 | autoreap = true; |
771 | } | 771 | } |
772 | 772 | ||
773 | tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; | 773 | tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; |
774 | 774 | ||
775 | /* mt-exec, de_thread() is waiting for group leader */ | 775 | /* mt-exec, de_thread() is waiting for group leader */ |
776 | if (unlikely(tsk->signal->notify_count < 0)) | 776 | if (unlikely(tsk->signal->notify_count < 0)) |
777 | wake_up_process(tsk->signal->group_exit_task); | 777 | wake_up_process(tsk->signal->group_exit_task); |
778 | write_unlock_irq(&tasklist_lock); | 778 | write_unlock_irq(&tasklist_lock); |
779 | 779 | ||
780 | /* If the process is dead, release it - nobody will wait for it */ | 780 | /* If the process is dead, release it - nobody will wait for it */ |
781 | if (autoreap) | 781 | if (autoreap) |
782 | release_task(tsk); | 782 | release_task(tsk); |
783 | } | 783 | } |
784 | 784 | ||
785 | #ifdef CONFIG_DEBUG_STACK_USAGE | 785 | #ifdef CONFIG_DEBUG_STACK_USAGE |
786 | static void check_stack_usage(void) | 786 | static void check_stack_usage(void) |
787 | { | 787 | { |
788 | static DEFINE_SPINLOCK(low_water_lock); | 788 | static DEFINE_SPINLOCK(low_water_lock); |
789 | static int lowest_to_date = THREAD_SIZE; | 789 | static int lowest_to_date = THREAD_SIZE; |
790 | unsigned long free; | 790 | unsigned long free; |
791 | 791 | ||
792 | free = stack_not_used(current); | 792 | free = stack_not_used(current); |
793 | 793 | ||
794 | if (free >= lowest_to_date) | 794 | if (free >= lowest_to_date) |
795 | return; | 795 | return; |
796 | 796 | ||
797 | spin_lock(&low_water_lock); | 797 | spin_lock(&low_water_lock); |
798 | if (free < lowest_to_date) { | 798 | if (free < lowest_to_date) { |
799 | printk(KERN_WARNING "%s (%d) used greatest stack depth: " | 799 | printk(KERN_WARNING "%s (%d) used greatest stack depth: " |
800 | "%lu bytes left\n", | 800 | "%lu bytes left\n", |
801 | current->comm, task_pid_nr(current), free); | 801 | current->comm, task_pid_nr(current), free); |
802 | lowest_to_date = free; | 802 | lowest_to_date = free; |
803 | } | 803 | } |
804 | spin_unlock(&low_water_lock); | 804 | spin_unlock(&low_water_lock); |
805 | } | 805 | } |
806 | #else | 806 | #else |
807 | static inline void check_stack_usage(void) {} | 807 | static inline void check_stack_usage(void) {} |
808 | #endif | 808 | #endif |
809 | 809 | ||
810 | void do_exit(long code) | 810 | void do_exit(long code) |
811 | { | 811 | { |
812 | struct task_struct *tsk = current; | 812 | struct task_struct *tsk = current; |
813 | int group_dead; | 813 | int group_dead; |
814 | 814 | ||
815 | profile_task_exit(tsk); | 815 | profile_task_exit(tsk); |
816 | 816 | ||
817 | WARN_ON(blk_needs_flush_plug(tsk)); | 817 | WARN_ON(blk_needs_flush_plug(tsk)); |
818 | 818 | ||
819 | if (unlikely(in_interrupt())) | 819 | if (unlikely(in_interrupt())) |
820 | panic("Aiee, killing interrupt handler!"); | 820 | panic("Aiee, killing interrupt handler!"); |
821 | if (unlikely(!tsk->pid)) | 821 | if (unlikely(!tsk->pid)) |
822 | panic("Attempted to kill the idle task!"); | 822 | panic("Attempted to kill the idle task!"); |
823 | 823 | ||
824 | /* | 824 | /* |
825 | * If do_exit is called because this processes oopsed, it's possible | 825 | * If do_exit is called because this processes oopsed, it's possible |
826 | * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before | 826 | * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before |
827 | * continuing. Amongst other possible reasons, this is to prevent | 827 | * continuing. Amongst other possible reasons, this is to prevent |
828 | * mm_release()->clear_child_tid() from writing to a user-controlled | 828 | * mm_release()->clear_child_tid() from writing to a user-controlled |
829 | * kernel address. | 829 | * kernel address. |
830 | */ | 830 | */ |
831 | set_fs(USER_DS); | 831 | set_fs(USER_DS); |
832 | 832 | ||
833 | ptrace_event(PTRACE_EVENT_EXIT, code); | 833 | ptrace_event(PTRACE_EVENT_EXIT, code); |
834 | 834 | ||
835 | validate_creds_for_do_exit(tsk); | 835 | validate_creds_for_do_exit(tsk); |
836 | 836 | ||
837 | /* | 837 | /* |
838 | * We're taking recursive faults here in do_exit. Safest is to just | 838 | * We're taking recursive faults here in do_exit. Safest is to just |
839 | * leave this task alone and wait for reboot. | 839 | * leave this task alone and wait for reboot. |
840 | */ | 840 | */ |
841 | if (unlikely(tsk->flags & PF_EXITING)) { | 841 | if (unlikely(tsk->flags & PF_EXITING)) { |
842 | printk(KERN_ALERT | 842 | printk(KERN_ALERT |
843 | "Fixing recursive fault but reboot is needed!\n"); | 843 | "Fixing recursive fault but reboot is needed!\n"); |
844 | /* | 844 | /* |
845 | * We can do this unlocked here. The futex code uses | 845 | * We can do this unlocked here. The futex code uses |
846 | * this flag just to verify whether the pi state | 846 | * this flag just to verify whether the pi state |
847 | * cleanup has been done or not. In the worst case it | 847 | * cleanup has been done or not. In the worst case it |
848 | * loops once more. We pretend that the cleanup was | 848 | * loops once more. We pretend that the cleanup was |
849 | * done as there is no way to return. Either the | 849 | * done as there is no way to return. Either the |
850 | * OWNER_DIED bit is set by now or we push the blocked | 850 | * OWNER_DIED bit is set by now or we push the blocked |
851 | * task into the wait for ever nirwana as well. | 851 | * task into the wait for ever nirwana as well. |
852 | */ | 852 | */ |
853 | tsk->flags |= PF_EXITPIDONE; | 853 | tsk->flags |= PF_EXITPIDONE; |
854 | set_current_state(TASK_UNINTERRUPTIBLE); | 854 | set_current_state(TASK_UNINTERRUPTIBLE); |
855 | schedule(); | 855 | schedule(); |
856 | } | 856 | } |
857 | 857 | ||
858 | exit_signals(tsk); /* sets PF_EXITING */ | 858 | exit_signals(tsk); /* sets PF_EXITING */ |
859 | /* | 859 | /* |
860 | * tsk->flags are checked in the futex code to protect against | 860 | * tsk->flags are checked in the futex code to protect against |
861 | * an exiting task cleaning up the robust pi futexes. | 861 | * an exiting task cleaning up the robust pi futexes. |
862 | */ | 862 | */ |
863 | smp_mb(); | 863 | smp_mb(); |
864 | raw_spin_unlock_wait(&tsk->pi_lock); | 864 | raw_spin_unlock_wait(&tsk->pi_lock); |
865 | 865 | ||
866 | if (unlikely(in_atomic())) | 866 | if (unlikely(in_atomic())) |
867 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", | 867 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", |
868 | current->comm, task_pid_nr(current), | 868 | current->comm, task_pid_nr(current), |
869 | preempt_count()); | 869 | preempt_count()); |
870 | 870 | ||
871 | acct_update_integrals(tsk); | 871 | acct_update_integrals(tsk); |
872 | /* sync mm's RSS info before statistics gathering */ | 872 | /* sync mm's RSS info before statistics gathering */ |
873 | if (tsk->mm) | 873 | if (tsk->mm) |
874 | sync_mm_rss(tsk->mm); | 874 | sync_mm_rss(tsk->mm); |
875 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 875 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
876 | if (group_dead) { | 876 | if (group_dead) { |
877 | hrtimer_cancel(&tsk->signal->real_timer); | 877 | hrtimer_cancel(&tsk->signal->real_timer); |
878 | exit_itimers(tsk->signal); | 878 | exit_itimers(tsk->signal); |
879 | if (tsk->mm) | 879 | if (tsk->mm) |
880 | setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); | 880 | setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); |
881 | } | 881 | } |
882 | acct_collect(code, group_dead); | 882 | acct_collect(code, group_dead); |
883 | if (group_dead) | 883 | if (group_dead) |
884 | tty_audit_exit(); | 884 | tty_audit_exit(); |
885 | audit_free(tsk); | 885 | audit_free(tsk); |
886 | 886 | ||
887 | tsk->exit_code = code; | 887 | tsk->exit_code = code; |
888 | taskstats_exit(tsk, group_dead); | 888 | taskstats_exit(tsk, group_dead); |
889 | 889 | ||
890 | exit_mm(tsk); | 890 | exit_mm(tsk); |
891 | 891 | ||
892 | if (group_dead) | 892 | if (group_dead) |
893 | acct_process(); | 893 | acct_process(); |
894 | trace_sched_process_exit(tsk); | 894 | trace_sched_process_exit(tsk); |
895 | 895 | ||
896 | exit_sem(tsk); | 896 | exit_sem(tsk); |
897 | exit_shm(tsk); | 897 | exit_shm(tsk); |
898 | exit_files(tsk); | 898 | exit_files(tsk); |
899 | exit_fs(tsk); | 899 | exit_fs(tsk); |
900 | exit_task_work(tsk); | 900 | exit_task_work(tsk); |
901 | check_stack_usage(); | 901 | check_stack_usage(); |
902 | exit_thread(); | 902 | exit_thread(); |
903 | 903 | ||
904 | /* | 904 | /* |
905 | * Flush inherited counters to the parent - before the parent | 905 | * Flush inherited counters to the parent - before the parent |
906 | * gets woken up by child-exit notifications. | 906 | * gets woken up by child-exit notifications. |
907 | * | 907 | * |
908 | * because of cgroup mode, must be called before cgroup_exit() | 908 | * because of cgroup mode, must be called before cgroup_exit() |
909 | */ | 909 | */ |
910 | perf_event_exit_task(tsk); | 910 | perf_event_exit_task(tsk); |
911 | 911 | ||
912 | cgroup_exit(tsk, 1); | 912 | cgroup_exit(tsk, 1); |
913 | 913 | ||
914 | if (group_dead) | 914 | if (group_dead) |
915 | disassociate_ctty(1); | 915 | disassociate_ctty(1); |
916 | 916 | ||
917 | module_put(task_thread_info(tsk)->exec_domain->module); | 917 | module_put(task_thread_info(tsk)->exec_domain->module); |
918 | 918 | ||
919 | proc_exit_connector(tsk); | 919 | proc_exit_connector(tsk); |
920 | 920 | ||
921 | /* | 921 | /* |
922 | * FIXME: do that only when needed, using sched_exit tracepoint | 922 | * FIXME: do that only when needed, using sched_exit tracepoint |
923 | */ | 923 | */ |
924 | ptrace_put_breakpoints(tsk); | 924 | ptrace_put_breakpoints(tsk); |
925 | 925 | ||
926 | exit_notify(tsk, group_dead); | 926 | exit_notify(tsk, group_dead); |
927 | #ifdef CONFIG_NUMA | 927 | #ifdef CONFIG_NUMA |
928 | task_lock(tsk); | 928 | task_lock(tsk); |
929 | mpol_put(tsk->mempolicy); | 929 | mpol_put(tsk->mempolicy); |
930 | tsk->mempolicy = NULL; | 930 | tsk->mempolicy = NULL; |
931 | task_unlock(tsk); | 931 | task_unlock(tsk); |
932 | #endif | 932 | #endif |
933 | #ifdef CONFIG_FUTEX | 933 | #ifdef CONFIG_FUTEX |
934 | if (unlikely(current->pi_state_cache)) | 934 | if (unlikely(current->pi_state_cache)) |
935 | kfree(current->pi_state_cache); | 935 | kfree(current->pi_state_cache); |
936 | #endif | 936 | #endif |
937 | /* | 937 | /* |
938 | * Make sure we are holding no locks: | 938 | * Make sure we are holding no locks: |
939 | */ | 939 | */ |
940 | debug_check_no_locks_held(tsk); | 940 | debug_check_no_locks_held(tsk); |
941 | /* | 941 | /* |
942 | * We can do this unlocked here. The futex code uses this flag | 942 | * We can do this unlocked here. The futex code uses this flag |
943 | * just to verify whether the pi state cleanup has been done | 943 | * just to verify whether the pi state cleanup has been done |
944 | * or not. In the worst case it loops once more. | 944 | * or not. In the worst case it loops once more. |
945 | */ | 945 | */ |
946 | tsk->flags |= PF_EXITPIDONE; | 946 | tsk->flags |= PF_EXITPIDONE; |
947 | 947 | ||
948 | if (tsk->io_context) | 948 | if (tsk->io_context) |
949 | exit_io_context(tsk); | 949 | exit_io_context(tsk); |
950 | 950 | ||
951 | if (tsk->splice_pipe) | 951 | if (tsk->splice_pipe) |
952 | __free_pipe_info(tsk->splice_pipe); | 952 | __free_pipe_info(tsk->splice_pipe); |
953 | 953 | ||
954 | if (tsk->task_frag.page) | 954 | if (tsk->task_frag.page) |
955 | put_page(tsk->task_frag.page); | 955 | put_page(tsk->task_frag.page); |
956 | 956 | ||
957 | validate_creds_for_do_exit(tsk); | 957 | validate_creds_for_do_exit(tsk); |
958 | 958 | ||
959 | preempt_disable(); | 959 | preempt_disable(); |
960 | if (tsk->nr_dirtied) | 960 | if (tsk->nr_dirtied) |
961 | __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); | 961 | __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); |
962 | exit_rcu(); | 962 | exit_rcu(); |
963 | 963 | ||
964 | /* | 964 | /* |
965 | * The setting of TASK_RUNNING by try_to_wake_up() may be delayed | 965 | * The setting of TASK_RUNNING by try_to_wake_up() may be delayed |
966 | * when the following two conditions become true. | 966 | * when the following two conditions become true. |
967 | * - There is race condition of mmap_sem (It is acquired by | 967 | * - There is race condition of mmap_sem (It is acquired by |
968 | * exit_mm()), and | 968 | * exit_mm()), and |
969 | * - SMI occurs before setting TASK_RUNINNG. | 969 | * - SMI occurs before setting TASK_RUNINNG. |
970 | * (or hypervisor of virtual machine switches to other guest) | 970 | * (or hypervisor of virtual machine switches to other guest) |
971 | * As a result, we may become TASK_RUNNING after becoming TASK_DEAD | 971 | * As a result, we may become TASK_RUNNING after becoming TASK_DEAD |
972 | * | 972 | * |
973 | * To avoid it, we have to wait for releasing tsk->pi_lock which | 973 | * To avoid it, we have to wait for releasing tsk->pi_lock which |
974 | * is held by try_to_wake_up() | 974 | * is held by try_to_wake_up() |
975 | */ | 975 | */ |
976 | smp_mb(); | 976 | smp_mb(); |
977 | raw_spin_unlock_wait(&tsk->pi_lock); | 977 | raw_spin_unlock_wait(&tsk->pi_lock); |
978 | 978 | ||
979 | /* causes final put_task_struct in finish_task_switch(). */ | 979 | /* causes final put_task_struct in finish_task_switch(). */ |
980 | tsk->state = TASK_DEAD; | 980 | tsk->state = TASK_DEAD; |
981 | tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ | 981 | tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ |
982 | schedule(); | 982 | schedule(); |
983 | BUG(); | 983 | BUG(); |
984 | /* Avoid "noreturn function does return". */ | 984 | /* Avoid "noreturn function does return". */ |
985 | for (;;) | 985 | for (;;) |
986 | cpu_relax(); /* For when BUG is null */ | 986 | cpu_relax(); /* For when BUG is null */ |
987 | } | 987 | } |
988 | 988 | ||
989 | EXPORT_SYMBOL_GPL(do_exit); | 989 | EXPORT_SYMBOL_GPL(do_exit); |
990 | 990 | ||
991 | void complete_and_exit(struct completion *comp, long code) | 991 | void complete_and_exit(struct completion *comp, long code) |
992 | { | 992 | { |
993 | if (comp) | 993 | if (comp) |
994 | complete(comp); | 994 | complete(comp); |
995 | 995 | ||
996 | do_exit(code); | 996 | do_exit(code); |
997 | } | 997 | } |
998 | 998 | ||
999 | EXPORT_SYMBOL(complete_and_exit); | 999 | EXPORT_SYMBOL(complete_and_exit); |
1000 | 1000 | ||
1001 | SYSCALL_DEFINE1(exit, int, error_code) | 1001 | SYSCALL_DEFINE1(exit, int, error_code) |
1002 | { | 1002 | { |
1003 | do_exit((error_code&0xff)<<8); | 1003 | do_exit((error_code&0xff)<<8); |
1004 | } | 1004 | } |
1005 | 1005 | ||
1006 | /* | 1006 | /* |
1007 | * Take down every thread in the group. This is called by fatal signals | 1007 | * Take down every thread in the group. This is called by fatal signals |
1008 | * as well as by sys_exit_group (below). | 1008 | * as well as by sys_exit_group (below). |
1009 | */ | 1009 | */ |
1010 | void | 1010 | void |
1011 | do_group_exit(int exit_code) | 1011 | do_group_exit(int exit_code) |
1012 | { | 1012 | { |
1013 | struct signal_struct *sig = current->signal; | 1013 | struct signal_struct *sig = current->signal; |
1014 | 1014 | ||
1015 | BUG_ON(exit_code & 0x80); /* core dumps don't get here */ | 1015 | BUG_ON(exit_code & 0x80); /* core dumps don't get here */ |
1016 | 1016 | ||
1017 | if (signal_group_exit(sig)) | 1017 | if (signal_group_exit(sig)) |
1018 | exit_code = sig->group_exit_code; | 1018 | exit_code = sig->group_exit_code; |
1019 | else if (!thread_group_empty(current)) { | 1019 | else if (!thread_group_empty(current)) { |
1020 | struct sighand_struct *const sighand = current->sighand; | 1020 | struct sighand_struct *const sighand = current->sighand; |
1021 | spin_lock_irq(&sighand->siglock); | 1021 | spin_lock_irq(&sighand->siglock); |
1022 | if (signal_group_exit(sig)) | 1022 | if (signal_group_exit(sig)) |
1023 | /* Another thread got here before we took the lock. */ | 1023 | /* Another thread got here before we took the lock. */ |
1024 | exit_code = sig->group_exit_code; | 1024 | exit_code = sig->group_exit_code; |
1025 | else { | 1025 | else { |
1026 | sig->group_exit_code = exit_code; | 1026 | sig->group_exit_code = exit_code; |
1027 | sig->flags = SIGNAL_GROUP_EXIT; | 1027 | sig->flags = SIGNAL_GROUP_EXIT; |
1028 | zap_other_threads(current); | 1028 | zap_other_threads(current); |
1029 | } | 1029 | } |
1030 | spin_unlock_irq(&sighand->siglock); | 1030 | spin_unlock_irq(&sighand->siglock); |
1031 | } | 1031 | } |
1032 | 1032 | ||
1033 | do_exit(exit_code); | 1033 | do_exit(exit_code); |
1034 | /* NOTREACHED */ | 1034 | /* NOTREACHED */ |
1035 | } | 1035 | } |
1036 | 1036 | ||
1037 | /* | 1037 | /* |
1038 | * this kills every thread in the thread group. Note that any externally | 1038 | * this kills every thread in the thread group. Note that any externally |
1039 | * wait4()-ing process will get the correct exit code - even if this | 1039 | * wait4()-ing process will get the correct exit code - even if this |
1040 | * thread is not the thread group leader. | 1040 | * thread is not the thread group leader. |
1041 | */ | 1041 | */ |
1042 | SYSCALL_DEFINE1(exit_group, int, error_code) | 1042 | SYSCALL_DEFINE1(exit_group, int, error_code) |
1043 | { | 1043 | { |
1044 | do_group_exit((error_code & 0xff) << 8); | 1044 | do_group_exit((error_code & 0xff) << 8); |
1045 | /* NOTREACHED */ | 1045 | /* NOTREACHED */ |
1046 | return 0; | 1046 | return 0; |
1047 | } | 1047 | } |
1048 | 1048 | ||
1049 | struct wait_opts { | 1049 | struct wait_opts { |
1050 | enum pid_type wo_type; | 1050 | enum pid_type wo_type; |
1051 | int wo_flags; | 1051 | int wo_flags; |
1052 | struct pid *wo_pid; | 1052 | struct pid *wo_pid; |
1053 | 1053 | ||
1054 | struct siginfo __user *wo_info; | 1054 | struct siginfo __user *wo_info; |
1055 | int __user *wo_stat; | 1055 | int __user *wo_stat; |
1056 | struct rusage __user *wo_rusage; | 1056 | struct rusage __user *wo_rusage; |
1057 | 1057 | ||
1058 | wait_queue_t child_wait; | 1058 | wait_queue_t child_wait; |
1059 | int notask_error; | 1059 | int notask_error; |
1060 | }; | 1060 | }; |
1061 | 1061 | ||
1062 | static inline | 1062 | static inline |
1063 | struct pid *task_pid_type(struct task_struct *task, enum pid_type type) | 1063 | struct pid *task_pid_type(struct task_struct *task, enum pid_type type) |
1064 | { | 1064 | { |
1065 | if (type != PIDTYPE_PID) | 1065 | if (type != PIDTYPE_PID) |
1066 | task = task->group_leader; | 1066 | task = task->group_leader; |
1067 | return task->pids[type].pid; | 1067 | return task->pids[type].pid; |
1068 | } | 1068 | } |
1069 | 1069 | ||
1070 | static int eligible_pid(struct wait_opts *wo, struct task_struct *p) | 1070 | static int eligible_pid(struct wait_opts *wo, struct task_struct *p) |
1071 | { | 1071 | { |
1072 | return wo->wo_type == PIDTYPE_MAX || | 1072 | return wo->wo_type == PIDTYPE_MAX || |
1073 | task_pid_type(p, wo->wo_type) == wo->wo_pid; | 1073 | task_pid_type(p, wo->wo_type) == wo->wo_pid; |
1074 | } | 1074 | } |
1075 | 1075 | ||
1076 | static int eligible_child(struct wait_opts *wo, struct task_struct *p) | 1076 | static int eligible_child(struct wait_opts *wo, struct task_struct *p) |
1077 | { | 1077 | { |
1078 | if (!eligible_pid(wo, p)) | 1078 | if (!eligible_pid(wo, p)) |
1079 | return 0; | 1079 | return 0; |
1080 | /* Wait for all children (clone and not) if __WALL is set; | 1080 | /* Wait for all children (clone and not) if __WALL is set; |
1081 | * otherwise, wait for clone children *only* if __WCLONE is | 1081 | * otherwise, wait for clone children *only* if __WCLONE is |
1082 | * set; otherwise, wait for non-clone children *only*. (Note: | 1082 | * set; otherwise, wait for non-clone children *only*. (Note: |
1083 | * A "clone" child here is one that reports to its parent | 1083 | * A "clone" child here is one that reports to its parent |
1084 | * using a signal other than SIGCHLD.) */ | 1084 | * using a signal other than SIGCHLD.) */ |
1085 | if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) | 1085 | if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) |
1086 | && !(wo->wo_flags & __WALL)) | 1086 | && !(wo->wo_flags & __WALL)) |
1087 | return 0; | 1087 | return 0; |
1088 | 1088 | ||
1089 | return 1; | 1089 | return 1; |
1090 | } | 1090 | } |
1091 | 1091 | ||
1092 | static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, | 1092 | static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, |
1093 | pid_t pid, uid_t uid, int why, int status) | 1093 | pid_t pid, uid_t uid, int why, int status) |
1094 | { | 1094 | { |
1095 | struct siginfo __user *infop; | 1095 | struct siginfo __user *infop; |
1096 | int retval = wo->wo_rusage | 1096 | int retval = wo->wo_rusage |
1097 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | 1097 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; |
1098 | 1098 | ||
1099 | put_task_struct(p); | 1099 | put_task_struct(p); |
1100 | infop = wo->wo_info; | 1100 | infop = wo->wo_info; |
1101 | if (infop) { | 1101 | if (infop) { |
1102 | if (!retval) | 1102 | if (!retval) |
1103 | retval = put_user(SIGCHLD, &infop->si_signo); | 1103 | retval = put_user(SIGCHLD, &infop->si_signo); |
1104 | if (!retval) | 1104 | if (!retval) |
1105 | retval = put_user(0, &infop->si_errno); | 1105 | retval = put_user(0, &infop->si_errno); |
1106 | if (!retval) | 1106 | if (!retval) |
1107 | retval = put_user((short)why, &infop->si_code); | 1107 | retval = put_user((short)why, &infop->si_code); |
1108 | if (!retval) | 1108 | if (!retval) |
1109 | retval = put_user(pid, &infop->si_pid); | 1109 | retval = put_user(pid, &infop->si_pid); |
1110 | if (!retval) | 1110 | if (!retval) |
1111 | retval = put_user(uid, &infop->si_uid); | 1111 | retval = put_user(uid, &infop->si_uid); |
1112 | if (!retval) | 1112 | if (!retval) |
1113 | retval = put_user(status, &infop->si_status); | 1113 | retval = put_user(status, &infop->si_status); |
1114 | } | 1114 | } |
1115 | if (!retval) | 1115 | if (!retval) |
1116 | retval = pid; | 1116 | retval = pid; |
1117 | return retval; | 1117 | return retval; |
1118 | } | 1118 | } |
1119 | 1119 | ||
1120 | /* | 1120 | /* |
1121 | * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold | 1121 | * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold |
1122 | * read_lock(&tasklist_lock) on entry. If we return zero, we still hold | 1122 | * read_lock(&tasklist_lock) on entry. If we return zero, we still hold |
1123 | * the lock and this task is uninteresting. If we return nonzero, we have | 1123 | * the lock and this task is uninteresting. If we return nonzero, we have |
1124 | * released the lock and the system call should return. | 1124 | * released the lock and the system call should return. |
1125 | */ | 1125 | */ |
1126 | static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | 1126 | static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) |
1127 | { | 1127 | { |
1128 | unsigned long state; | 1128 | unsigned long state; |
1129 | int retval, status, traced; | 1129 | int retval, status, traced; |
1130 | pid_t pid = task_pid_vnr(p); | 1130 | pid_t pid = task_pid_vnr(p); |
1131 | uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); | 1131 | uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); |
1132 | struct siginfo __user *infop; | 1132 | struct siginfo __user *infop; |
1133 | 1133 | ||
1134 | if (!likely(wo->wo_flags & WEXITED)) | 1134 | if (!likely(wo->wo_flags & WEXITED)) |
1135 | return 0; | 1135 | return 0; |
1136 | 1136 | ||
1137 | if (unlikely(wo->wo_flags & WNOWAIT)) { | 1137 | if (unlikely(wo->wo_flags & WNOWAIT)) { |
1138 | int exit_code = p->exit_code; | 1138 | int exit_code = p->exit_code; |
1139 | int why; | 1139 | int why; |
1140 | 1140 | ||
1141 | get_task_struct(p); | 1141 | get_task_struct(p); |
1142 | read_unlock(&tasklist_lock); | 1142 | read_unlock(&tasklist_lock); |
1143 | if ((exit_code & 0x7f) == 0) { | 1143 | if ((exit_code & 0x7f) == 0) { |
1144 | why = CLD_EXITED; | 1144 | why = CLD_EXITED; |
1145 | status = exit_code >> 8; | 1145 | status = exit_code >> 8; |
1146 | } else { | 1146 | } else { |
1147 | why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; | 1147 | why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; |
1148 | status = exit_code & 0x7f; | 1148 | status = exit_code & 0x7f; |
1149 | } | 1149 | } |
1150 | return wait_noreap_copyout(wo, p, pid, uid, why, status); | 1150 | return wait_noreap_copyout(wo, p, pid, uid, why, status); |
1151 | } | 1151 | } |
1152 | 1152 | ||
1153 | /* | 1153 | /* |
1154 | * Try to move the task's state to DEAD | 1154 | * Try to move the task's state to DEAD |
1155 | * only one thread is allowed to do this: | 1155 | * only one thread is allowed to do this: |
1156 | */ | 1156 | */ |
1157 | state = xchg(&p->exit_state, EXIT_DEAD); | 1157 | state = xchg(&p->exit_state, EXIT_DEAD); |
1158 | if (state != EXIT_ZOMBIE) { | 1158 | if (state != EXIT_ZOMBIE) { |
1159 | BUG_ON(state != EXIT_DEAD); | 1159 | BUG_ON(state != EXIT_DEAD); |
1160 | return 0; | 1160 | return 0; |
1161 | } | 1161 | } |
1162 | 1162 | ||
1163 | traced = ptrace_reparented(p); | 1163 | traced = ptrace_reparented(p); |
1164 | /* | 1164 | /* |
1165 | * It can be ptraced but not reparented, check | 1165 | * It can be ptraced but not reparented, check |
1166 | * thread_group_leader() to filter out sub-threads. | 1166 | * thread_group_leader() to filter out sub-threads. |
1167 | */ | 1167 | */ |
1168 | if (likely(!traced) && thread_group_leader(p)) { | 1168 | if (likely(!traced) && thread_group_leader(p)) { |
1169 | struct signal_struct *psig; | 1169 | struct signal_struct *psig; |
1170 | struct signal_struct *sig; | 1170 | struct signal_struct *sig; |
1171 | unsigned long maxrss; | 1171 | unsigned long maxrss; |
1172 | cputime_t tgutime, tgstime; | 1172 | cputime_t tgutime, tgstime; |
1173 | 1173 | ||
1174 | /* | 1174 | /* |
1175 | * The resource counters for the group leader are in its | 1175 | * The resource counters for the group leader are in its |
1176 | * own task_struct. Those for dead threads in the group | 1176 | * own task_struct. Those for dead threads in the group |
1177 | * are in its signal_struct, as are those for the child | 1177 | * are in its signal_struct, as are those for the child |
1178 | * processes it has previously reaped. All these | 1178 | * processes it has previously reaped. All these |
1179 | * accumulate in the parent's signal_struct c* fields. | 1179 | * accumulate in the parent's signal_struct c* fields. |
1180 | * | 1180 | * |
1181 | * We don't bother to take a lock here to protect these | 1181 | * We don't bother to take a lock here to protect these |
1182 | * p->signal fields, because they are only touched by | 1182 | * p->signal fields, because they are only touched by |
1183 | * __exit_signal, which runs with tasklist_lock | 1183 | * __exit_signal, which runs with tasklist_lock |
1184 | * write-locked anyway, and so is excluded here. We do | 1184 | * write-locked anyway, and so is excluded here. We do |
1185 | * need to protect the access to parent->signal fields, | 1185 | * need to protect the access to parent->signal fields, |
1186 | * as other threads in the parent group can be right | 1186 | * as other threads in the parent group can be right |
1187 | * here reaping other children at the same time. | 1187 | * here reaping other children at the same time. |
1188 | * | 1188 | * |
1189 | * We use thread_group_times() to get times for the thread | 1189 | * We use thread_group_cputime_adjusted() to get times for the thread |
1190 | * group, which consolidates times for all threads in the | 1190 | * group, which consolidates times for all threads in the |
1191 | * group including the group leader. | 1191 | * group including the group leader. |
1192 | */ | 1192 | */ |
1193 | thread_group_times(p, &tgutime, &tgstime); | 1193 | thread_group_cputime_adjusted(p, &tgutime, &tgstime); |
1194 | spin_lock_irq(&p->real_parent->sighand->siglock); | 1194 | spin_lock_irq(&p->real_parent->sighand->siglock); |
1195 | psig = p->real_parent->signal; | 1195 | psig = p->real_parent->signal; |
1196 | sig = p->signal; | 1196 | sig = p->signal; |
1197 | psig->cutime += tgutime + sig->cutime; | 1197 | psig->cutime += tgutime + sig->cutime; |
1198 | psig->cstime += tgstime + sig->cstime; | 1198 | psig->cstime += tgstime + sig->cstime; |
1199 | psig->cgtime += p->gtime + sig->gtime + sig->cgtime; | 1199 | psig->cgtime += p->gtime + sig->gtime + sig->cgtime; |
1200 | psig->cmin_flt += | 1200 | psig->cmin_flt += |
1201 | p->min_flt + sig->min_flt + sig->cmin_flt; | 1201 | p->min_flt + sig->min_flt + sig->cmin_flt; |
1202 | psig->cmaj_flt += | 1202 | psig->cmaj_flt += |
1203 | p->maj_flt + sig->maj_flt + sig->cmaj_flt; | 1203 | p->maj_flt + sig->maj_flt + sig->cmaj_flt; |
1204 | psig->cnvcsw += | 1204 | psig->cnvcsw += |
1205 | p->nvcsw + sig->nvcsw + sig->cnvcsw; | 1205 | p->nvcsw + sig->nvcsw + sig->cnvcsw; |
1206 | psig->cnivcsw += | 1206 | psig->cnivcsw += |
1207 | p->nivcsw + sig->nivcsw + sig->cnivcsw; | 1207 | p->nivcsw + sig->nivcsw + sig->cnivcsw; |
1208 | psig->cinblock += | 1208 | psig->cinblock += |
1209 | task_io_get_inblock(p) + | 1209 | task_io_get_inblock(p) + |
1210 | sig->inblock + sig->cinblock; | 1210 | sig->inblock + sig->cinblock; |
1211 | psig->coublock += | 1211 | psig->coublock += |
1212 | task_io_get_oublock(p) + | 1212 | task_io_get_oublock(p) + |
1213 | sig->oublock + sig->coublock; | 1213 | sig->oublock + sig->coublock; |
1214 | maxrss = max(sig->maxrss, sig->cmaxrss); | 1214 | maxrss = max(sig->maxrss, sig->cmaxrss); |
1215 | if (psig->cmaxrss < maxrss) | 1215 | if (psig->cmaxrss < maxrss) |
1216 | psig->cmaxrss = maxrss; | 1216 | psig->cmaxrss = maxrss; |
1217 | task_io_accounting_add(&psig->ioac, &p->ioac); | 1217 | task_io_accounting_add(&psig->ioac, &p->ioac); |
1218 | task_io_accounting_add(&psig->ioac, &sig->ioac); | 1218 | task_io_accounting_add(&psig->ioac, &sig->ioac); |
1219 | spin_unlock_irq(&p->real_parent->sighand->siglock); | 1219 | spin_unlock_irq(&p->real_parent->sighand->siglock); |
1220 | } | 1220 | } |
1221 | 1221 | ||
1222 | /* | 1222 | /* |
1223 | * Now we are sure this task is interesting, and no other | 1223 | * Now we are sure this task is interesting, and no other |
1224 | * thread can reap it because we set its state to EXIT_DEAD. | 1224 | * thread can reap it because we set its state to EXIT_DEAD. |
1225 | */ | 1225 | */ |
1226 | read_unlock(&tasklist_lock); | 1226 | read_unlock(&tasklist_lock); |
1227 | 1227 | ||
1228 | retval = wo->wo_rusage | 1228 | retval = wo->wo_rusage |
1229 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | 1229 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; |
1230 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) | 1230 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) |
1231 | ? p->signal->group_exit_code : p->exit_code; | 1231 | ? p->signal->group_exit_code : p->exit_code; |
1232 | if (!retval && wo->wo_stat) | 1232 | if (!retval && wo->wo_stat) |
1233 | retval = put_user(status, wo->wo_stat); | 1233 | retval = put_user(status, wo->wo_stat); |
1234 | 1234 | ||
1235 | infop = wo->wo_info; | 1235 | infop = wo->wo_info; |
1236 | if (!retval && infop) | 1236 | if (!retval && infop) |
1237 | retval = put_user(SIGCHLD, &infop->si_signo); | 1237 | retval = put_user(SIGCHLD, &infop->si_signo); |
1238 | if (!retval && infop) | 1238 | if (!retval && infop) |
1239 | retval = put_user(0, &infop->si_errno); | 1239 | retval = put_user(0, &infop->si_errno); |
1240 | if (!retval && infop) { | 1240 | if (!retval && infop) { |
1241 | int why; | 1241 | int why; |
1242 | 1242 | ||
1243 | if ((status & 0x7f) == 0) { | 1243 | if ((status & 0x7f) == 0) { |
1244 | why = CLD_EXITED; | 1244 | why = CLD_EXITED; |
1245 | status >>= 8; | 1245 | status >>= 8; |
1246 | } else { | 1246 | } else { |
1247 | why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; | 1247 | why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; |
1248 | status &= 0x7f; | 1248 | status &= 0x7f; |
1249 | } | 1249 | } |
1250 | retval = put_user((short)why, &infop->si_code); | 1250 | retval = put_user((short)why, &infop->si_code); |
1251 | if (!retval) | 1251 | if (!retval) |
1252 | retval = put_user(status, &infop->si_status); | 1252 | retval = put_user(status, &infop->si_status); |
1253 | } | 1253 | } |
1254 | if (!retval && infop) | 1254 | if (!retval && infop) |
1255 | retval = put_user(pid, &infop->si_pid); | 1255 | retval = put_user(pid, &infop->si_pid); |
1256 | if (!retval && infop) | 1256 | if (!retval && infop) |
1257 | retval = put_user(uid, &infop->si_uid); | 1257 | retval = put_user(uid, &infop->si_uid); |
1258 | if (!retval) | 1258 | if (!retval) |
1259 | retval = pid; | 1259 | retval = pid; |
1260 | 1260 | ||
1261 | if (traced) { | 1261 | if (traced) { |
1262 | write_lock_irq(&tasklist_lock); | 1262 | write_lock_irq(&tasklist_lock); |
1263 | /* We dropped tasklist, ptracer could die and untrace */ | 1263 | /* We dropped tasklist, ptracer could die and untrace */ |
1264 | ptrace_unlink(p); | 1264 | ptrace_unlink(p); |
1265 | /* | 1265 | /* |
1266 | * If this is not a sub-thread, notify the parent. | 1266 | * If this is not a sub-thread, notify the parent. |
1267 | * If parent wants a zombie, don't release it now. | 1267 | * If parent wants a zombie, don't release it now. |
1268 | */ | 1268 | */ |
1269 | if (thread_group_leader(p) && | 1269 | if (thread_group_leader(p) && |
1270 | !do_notify_parent(p, p->exit_signal)) { | 1270 | !do_notify_parent(p, p->exit_signal)) { |
1271 | p->exit_state = EXIT_ZOMBIE; | 1271 | p->exit_state = EXIT_ZOMBIE; |
1272 | p = NULL; | 1272 | p = NULL; |
1273 | } | 1273 | } |
1274 | write_unlock_irq(&tasklist_lock); | 1274 | write_unlock_irq(&tasklist_lock); |
1275 | } | 1275 | } |
1276 | if (p != NULL) | 1276 | if (p != NULL) |
1277 | release_task(p); | 1277 | release_task(p); |
1278 | 1278 | ||
1279 | return retval; | 1279 | return retval; |
1280 | } | 1280 | } |
1281 | 1281 | ||
1282 | static int *task_stopped_code(struct task_struct *p, bool ptrace) | 1282 | static int *task_stopped_code(struct task_struct *p, bool ptrace) |
1283 | { | 1283 | { |
1284 | if (ptrace) { | 1284 | if (ptrace) { |
1285 | if (task_is_stopped_or_traced(p) && | 1285 | if (task_is_stopped_or_traced(p) && |
1286 | !(p->jobctl & JOBCTL_LISTENING)) | 1286 | !(p->jobctl & JOBCTL_LISTENING)) |
1287 | return &p->exit_code; | 1287 | return &p->exit_code; |
1288 | } else { | 1288 | } else { |
1289 | if (p->signal->flags & SIGNAL_STOP_STOPPED) | 1289 | if (p->signal->flags & SIGNAL_STOP_STOPPED) |
1290 | return &p->signal->group_exit_code; | 1290 | return &p->signal->group_exit_code; |
1291 | } | 1291 | } |
1292 | return NULL; | 1292 | return NULL; |
1293 | } | 1293 | } |
1294 | 1294 | ||
1295 | /** | 1295 | /** |
1296 | * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED | 1296 | * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED |
1297 | * @wo: wait options | 1297 | * @wo: wait options |
1298 | * @ptrace: is the wait for ptrace | 1298 | * @ptrace: is the wait for ptrace |
1299 | * @p: task to wait for | 1299 | * @p: task to wait for |
1300 | * | 1300 | * |
1301 | * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED. | 1301 | * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED. |
1302 | * | 1302 | * |
1303 | * CONTEXT: | 1303 | * CONTEXT: |
1304 | * read_lock(&tasklist_lock), which is released if return value is | 1304 | * read_lock(&tasklist_lock), which is released if return value is |
1305 | * non-zero. Also, grabs and releases @p->sighand->siglock. | 1305 | * non-zero. Also, grabs and releases @p->sighand->siglock. |
1306 | * | 1306 | * |
1307 | * RETURNS: | 1307 | * RETURNS: |
1308 | * 0 if wait condition didn't exist and search for other wait conditions | 1308 | * 0 if wait condition didn't exist and search for other wait conditions |
1309 | * should continue. Non-zero return, -errno on failure and @p's pid on | 1309 | * should continue. Non-zero return, -errno on failure and @p's pid on |
1310 | * success, implies that tasklist_lock is released and wait condition | 1310 | * success, implies that tasklist_lock is released and wait condition |
1311 | * search should terminate. | 1311 | * search should terminate. |
1312 | */ | 1312 | */ |
1313 | static int wait_task_stopped(struct wait_opts *wo, | 1313 | static int wait_task_stopped(struct wait_opts *wo, |
1314 | int ptrace, struct task_struct *p) | 1314 | int ptrace, struct task_struct *p) |
1315 | { | 1315 | { |
1316 | struct siginfo __user *infop; | 1316 | struct siginfo __user *infop; |
1317 | int retval, exit_code, *p_code, why; | 1317 | int retval, exit_code, *p_code, why; |
1318 | uid_t uid = 0; /* unneeded, required by compiler */ | 1318 | uid_t uid = 0; /* unneeded, required by compiler */ |
1319 | pid_t pid; | 1319 | pid_t pid; |
1320 | 1320 | ||
1321 | /* | 1321 | /* |
1322 | * Traditionally we see ptrace'd stopped tasks regardless of options. | 1322 | * Traditionally we see ptrace'd stopped tasks regardless of options. |
1323 | */ | 1323 | */ |
1324 | if (!ptrace && !(wo->wo_flags & WUNTRACED)) | 1324 | if (!ptrace && !(wo->wo_flags & WUNTRACED)) |
1325 | return 0; | 1325 | return 0; |
1326 | 1326 | ||
1327 | if (!task_stopped_code(p, ptrace)) | 1327 | if (!task_stopped_code(p, ptrace)) |
1328 | return 0; | 1328 | return 0; |
1329 | 1329 | ||
1330 | exit_code = 0; | 1330 | exit_code = 0; |
1331 | spin_lock_irq(&p->sighand->siglock); | 1331 | spin_lock_irq(&p->sighand->siglock); |
1332 | 1332 | ||
1333 | p_code = task_stopped_code(p, ptrace); | 1333 | p_code = task_stopped_code(p, ptrace); |
1334 | if (unlikely(!p_code)) | 1334 | if (unlikely(!p_code)) |
1335 | goto unlock_sig; | 1335 | goto unlock_sig; |
1336 | 1336 | ||
1337 | exit_code = *p_code; | 1337 | exit_code = *p_code; |
1338 | if (!exit_code) | 1338 | if (!exit_code) |
1339 | goto unlock_sig; | 1339 | goto unlock_sig; |
1340 | 1340 | ||
1341 | if (!unlikely(wo->wo_flags & WNOWAIT)) | 1341 | if (!unlikely(wo->wo_flags & WNOWAIT)) |
1342 | *p_code = 0; | 1342 | *p_code = 0; |
1343 | 1343 | ||
1344 | uid = from_kuid_munged(current_user_ns(), task_uid(p)); | 1344 | uid = from_kuid_munged(current_user_ns(), task_uid(p)); |
1345 | unlock_sig: | 1345 | unlock_sig: |
1346 | spin_unlock_irq(&p->sighand->siglock); | 1346 | spin_unlock_irq(&p->sighand->siglock); |
1347 | if (!exit_code) | 1347 | if (!exit_code) |
1348 | return 0; | 1348 | return 0; |
1349 | 1349 | ||
1350 | /* | 1350 | /* |
1351 | * Now we are pretty sure this task is interesting. | 1351 | * Now we are pretty sure this task is interesting. |
1352 | * Make sure it doesn't get reaped out from under us while we | 1352 | * Make sure it doesn't get reaped out from under us while we |
1353 | * give up the lock and then examine it below. We don't want to | 1353 | * give up the lock and then examine it below. We don't want to |
1354 | * keep holding onto the tasklist_lock while we call getrusage and | 1354 | * keep holding onto the tasklist_lock while we call getrusage and |
1355 | * possibly take page faults for user memory. | 1355 | * possibly take page faults for user memory. |
1356 | */ | 1356 | */ |
1357 | get_task_struct(p); | 1357 | get_task_struct(p); |
1358 | pid = task_pid_vnr(p); | 1358 | pid = task_pid_vnr(p); |
1359 | why = ptrace ? CLD_TRAPPED : CLD_STOPPED; | 1359 | why = ptrace ? CLD_TRAPPED : CLD_STOPPED; |
1360 | read_unlock(&tasklist_lock); | 1360 | read_unlock(&tasklist_lock); |
1361 | 1361 | ||
1362 | if (unlikely(wo->wo_flags & WNOWAIT)) | 1362 | if (unlikely(wo->wo_flags & WNOWAIT)) |
1363 | return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); | 1363 | return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); |
1364 | 1364 | ||
1365 | retval = wo->wo_rusage | 1365 | retval = wo->wo_rusage |
1366 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | 1366 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; |
1367 | if (!retval && wo->wo_stat) | 1367 | if (!retval && wo->wo_stat) |
1368 | retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat); | 1368 | retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat); |
1369 | 1369 | ||
1370 | infop = wo->wo_info; | 1370 | infop = wo->wo_info; |
1371 | if (!retval && infop) | 1371 | if (!retval && infop) |
1372 | retval = put_user(SIGCHLD, &infop->si_signo); | 1372 | retval = put_user(SIGCHLD, &infop->si_signo); |
1373 | if (!retval && infop) | 1373 | if (!retval && infop) |
1374 | retval = put_user(0, &infop->si_errno); | 1374 | retval = put_user(0, &infop->si_errno); |
1375 | if (!retval && infop) | 1375 | if (!retval && infop) |
1376 | retval = put_user((short)why, &infop->si_code); | 1376 | retval = put_user((short)why, &infop->si_code); |
1377 | if (!retval && infop) | 1377 | if (!retval && infop) |
1378 | retval = put_user(exit_code, &infop->si_status); | 1378 | retval = put_user(exit_code, &infop->si_status); |
1379 | if (!retval && infop) | 1379 | if (!retval && infop) |
1380 | retval = put_user(pid, &infop->si_pid); | 1380 | retval = put_user(pid, &infop->si_pid); |
1381 | if (!retval && infop) | 1381 | if (!retval && infop) |
1382 | retval = put_user(uid, &infop->si_uid); | 1382 | retval = put_user(uid, &infop->si_uid); |
1383 | if (!retval) | 1383 | if (!retval) |
1384 | retval = pid; | 1384 | retval = pid; |
1385 | put_task_struct(p); | 1385 | put_task_struct(p); |
1386 | 1386 | ||
1387 | BUG_ON(!retval); | 1387 | BUG_ON(!retval); |
1388 | return retval; | 1388 | return retval; |
1389 | } | 1389 | } |
1390 | 1390 | ||
1391 | /* | 1391 | /* |
1392 | * Handle do_wait work for one task in a live, non-stopped state. | 1392 | * Handle do_wait work for one task in a live, non-stopped state. |
1393 | * read_lock(&tasklist_lock) on entry. If we return zero, we still hold | 1393 | * read_lock(&tasklist_lock) on entry. If we return zero, we still hold |
1394 | * the lock and this task is uninteresting. If we return nonzero, we have | 1394 | * the lock and this task is uninteresting. If we return nonzero, we have |
1395 | * released the lock and the system call should return. | 1395 | * released the lock and the system call should return. |
1396 | */ | 1396 | */ |
1397 | static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) | 1397 | static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) |
1398 | { | 1398 | { |
1399 | int retval; | 1399 | int retval; |
1400 | pid_t pid; | 1400 | pid_t pid; |
1401 | uid_t uid; | 1401 | uid_t uid; |
1402 | 1402 | ||
1403 | if (!unlikely(wo->wo_flags & WCONTINUED)) | 1403 | if (!unlikely(wo->wo_flags & WCONTINUED)) |
1404 | return 0; | 1404 | return 0; |
1405 | 1405 | ||
1406 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) | 1406 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) |
1407 | return 0; | 1407 | return 0; |
1408 | 1408 | ||
1409 | spin_lock_irq(&p->sighand->siglock); | 1409 | spin_lock_irq(&p->sighand->siglock); |
1410 | /* Re-check with the lock held. */ | 1410 | /* Re-check with the lock held. */ |
1411 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) { | 1411 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) { |
1412 | spin_unlock_irq(&p->sighand->siglock); | 1412 | spin_unlock_irq(&p->sighand->siglock); |
1413 | return 0; | 1413 | return 0; |
1414 | } | 1414 | } |
1415 | if (!unlikely(wo->wo_flags & WNOWAIT)) | 1415 | if (!unlikely(wo->wo_flags & WNOWAIT)) |
1416 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; | 1416 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; |
1417 | uid = from_kuid_munged(current_user_ns(), task_uid(p)); | 1417 | uid = from_kuid_munged(current_user_ns(), task_uid(p)); |
1418 | spin_unlock_irq(&p->sighand->siglock); | 1418 | spin_unlock_irq(&p->sighand->siglock); |
1419 | 1419 | ||
1420 | pid = task_pid_vnr(p); | 1420 | pid = task_pid_vnr(p); |
1421 | get_task_struct(p); | 1421 | get_task_struct(p); |
1422 | read_unlock(&tasklist_lock); | 1422 | read_unlock(&tasklist_lock); |
1423 | 1423 | ||
1424 | if (!wo->wo_info) { | 1424 | if (!wo->wo_info) { |
1425 | retval = wo->wo_rusage | 1425 | retval = wo->wo_rusage |
1426 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | 1426 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; |
1427 | put_task_struct(p); | 1427 | put_task_struct(p); |
1428 | if (!retval && wo->wo_stat) | 1428 | if (!retval && wo->wo_stat) |
1429 | retval = put_user(0xffff, wo->wo_stat); | 1429 | retval = put_user(0xffff, wo->wo_stat); |
1430 | if (!retval) | 1430 | if (!retval) |
1431 | retval = pid; | 1431 | retval = pid; |
1432 | } else { | 1432 | } else { |
1433 | retval = wait_noreap_copyout(wo, p, pid, uid, | 1433 | retval = wait_noreap_copyout(wo, p, pid, uid, |
1434 | CLD_CONTINUED, SIGCONT); | 1434 | CLD_CONTINUED, SIGCONT); |
1435 | BUG_ON(retval == 0); | 1435 | BUG_ON(retval == 0); |
1436 | } | 1436 | } |
1437 | 1437 | ||
1438 | return retval; | 1438 | return retval; |
1439 | } | 1439 | } |
1440 | 1440 | ||
1441 | /* | 1441 | /* |
1442 | * Consider @p for a wait by @parent. | 1442 | * Consider @p for a wait by @parent. |
1443 | * | 1443 | * |
1444 | * -ECHILD should be in ->notask_error before the first call. | 1444 | * -ECHILD should be in ->notask_error before the first call. |
1445 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. | 1445 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. |
1446 | * Returns zero if the search for a child should continue; | 1446 | * Returns zero if the search for a child should continue; |
1447 | * then ->notask_error is 0 if @p is an eligible child, | 1447 | * then ->notask_error is 0 if @p is an eligible child, |
1448 | * or another error from security_task_wait(), or still -ECHILD. | 1448 | * or another error from security_task_wait(), or still -ECHILD. |
1449 | */ | 1449 | */ |
1450 | static int wait_consider_task(struct wait_opts *wo, int ptrace, | 1450 | static int wait_consider_task(struct wait_opts *wo, int ptrace, |
1451 | struct task_struct *p) | 1451 | struct task_struct *p) |
1452 | { | 1452 | { |
1453 | int ret = eligible_child(wo, p); | 1453 | int ret = eligible_child(wo, p); |
1454 | if (!ret) | 1454 | if (!ret) |
1455 | return ret; | 1455 | return ret; |
1456 | 1456 | ||
1457 | ret = security_task_wait(p); | 1457 | ret = security_task_wait(p); |
1458 | if (unlikely(ret < 0)) { | 1458 | if (unlikely(ret < 0)) { |
1459 | /* | 1459 | /* |
1460 | * If we have not yet seen any eligible child, | 1460 | * If we have not yet seen any eligible child, |
1461 | * then let this error code replace -ECHILD. | 1461 | * then let this error code replace -ECHILD. |
1462 | * A permission error will give the user a clue | 1462 | * A permission error will give the user a clue |
1463 | * to look for security policy problems, rather | 1463 | * to look for security policy problems, rather |
1464 | * than for mysterious wait bugs. | 1464 | * than for mysterious wait bugs. |
1465 | */ | 1465 | */ |
1466 | if (wo->notask_error) | 1466 | if (wo->notask_error) |
1467 | wo->notask_error = ret; | 1467 | wo->notask_error = ret; |
1468 | return 0; | 1468 | return 0; |
1469 | } | 1469 | } |
1470 | 1470 | ||
1471 | /* dead body doesn't have much to contribute */ | 1471 | /* dead body doesn't have much to contribute */ |
1472 | if (unlikely(p->exit_state == EXIT_DEAD)) { | 1472 | if (unlikely(p->exit_state == EXIT_DEAD)) { |
1473 | /* | 1473 | /* |
1474 | * But do not ignore this task until the tracer does | 1474 | * But do not ignore this task until the tracer does |
1475 | * wait_task_zombie()->do_notify_parent(). | 1475 | * wait_task_zombie()->do_notify_parent(). |
1476 | */ | 1476 | */ |
1477 | if (likely(!ptrace) && unlikely(ptrace_reparented(p))) | 1477 | if (likely(!ptrace) && unlikely(ptrace_reparented(p))) |
1478 | wo->notask_error = 0; | 1478 | wo->notask_error = 0; |
1479 | return 0; | 1479 | return 0; |
1480 | } | 1480 | } |
1481 | 1481 | ||
1482 | /* slay zombie? */ | 1482 | /* slay zombie? */ |
1483 | if (p->exit_state == EXIT_ZOMBIE) { | 1483 | if (p->exit_state == EXIT_ZOMBIE) { |
1484 | /* | 1484 | /* |
1485 | * A zombie ptracee is only visible to its ptracer. | 1485 | * A zombie ptracee is only visible to its ptracer. |
1486 | * Notification and reaping will be cascaded to the real | 1486 | * Notification and reaping will be cascaded to the real |
1487 | * parent when the ptracer detaches. | 1487 | * parent when the ptracer detaches. |
1488 | */ | 1488 | */ |
1489 | if (likely(!ptrace) && unlikely(p->ptrace)) { | 1489 | if (likely(!ptrace) && unlikely(p->ptrace)) { |
1490 | /* it will become visible, clear notask_error */ | 1490 | /* it will become visible, clear notask_error */ |
1491 | wo->notask_error = 0; | 1491 | wo->notask_error = 0; |
1492 | return 0; | 1492 | return 0; |
1493 | } | 1493 | } |
1494 | 1494 | ||
1495 | /* we don't reap group leaders with subthreads */ | 1495 | /* we don't reap group leaders with subthreads */ |
1496 | if (!delay_group_leader(p)) | 1496 | if (!delay_group_leader(p)) |
1497 | return wait_task_zombie(wo, p); | 1497 | return wait_task_zombie(wo, p); |
1498 | 1498 | ||
1499 | /* | 1499 | /* |
1500 | * Allow access to stopped/continued state via zombie by | 1500 | * Allow access to stopped/continued state via zombie by |
1501 | * falling through. Clearing of notask_error is complex. | 1501 | * falling through. Clearing of notask_error is complex. |
1502 | * | 1502 | * |
1503 | * When !@ptrace: | 1503 | * When !@ptrace: |
1504 | * | 1504 | * |
1505 | * If WEXITED is set, notask_error should naturally be | 1505 | * If WEXITED is set, notask_error should naturally be |
1506 | * cleared. If not, subset of WSTOPPED|WCONTINUED is set, | 1506 | * cleared. If not, subset of WSTOPPED|WCONTINUED is set, |
1507 | * so, if there are live subthreads, there are events to | 1507 | * so, if there are live subthreads, there are events to |
1508 | * wait for. If all subthreads are dead, it's still safe | 1508 | * wait for. If all subthreads are dead, it's still safe |
1509 | * to clear - this function will be called again in finite | 1509 | * to clear - this function will be called again in finite |
1510 | * amount time once all the subthreads are released and | 1510 | * amount time once all the subthreads are released and |
1511 | * will then return without clearing. | 1511 | * will then return without clearing. |
1512 | * | 1512 | * |
1513 | * When @ptrace: | 1513 | * When @ptrace: |
1514 | * | 1514 | * |
1515 | * Stopped state is per-task and thus can't change once the | 1515 | * Stopped state is per-task and thus can't change once the |
1516 | * target task dies. Only continued and exited can happen. | 1516 | * target task dies. Only continued and exited can happen. |
1517 | * Clear notask_error if WCONTINUED | WEXITED. | 1517 | * Clear notask_error if WCONTINUED | WEXITED. |
1518 | */ | 1518 | */ |
1519 | if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) | 1519 | if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) |
1520 | wo->notask_error = 0; | 1520 | wo->notask_error = 0; |
1521 | } else { | 1521 | } else { |
1522 | /* | 1522 | /* |
1523 | * If @p is ptraced by a task in its real parent's group, | 1523 | * If @p is ptraced by a task in its real parent's group, |
1524 | * hide group stop/continued state when looking at @p as | 1524 | * hide group stop/continued state when looking at @p as |
1525 | * the real parent; otherwise, a single stop can be | 1525 | * the real parent; otherwise, a single stop can be |
1526 | * reported twice as group and ptrace stops. | 1526 | * reported twice as group and ptrace stops. |
1527 | * | 1527 | * |
1528 | * If a ptracer wants to distinguish the two events for its | 1528 | * If a ptracer wants to distinguish the two events for its |
1529 | * own children, it should create a separate process which | 1529 | * own children, it should create a separate process which |
1530 | * takes the role of real parent. | 1530 | * takes the role of real parent. |
1531 | */ | 1531 | */ |
1532 | if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p)) | 1532 | if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p)) |
1533 | return 0; | 1533 | return 0; |
1534 | 1534 | ||
1535 | /* | 1535 | /* |
1536 | * @p is alive and it's gonna stop, continue or exit, so | 1536 | * @p is alive and it's gonna stop, continue or exit, so |
1537 | * there always is something to wait for. | 1537 | * there always is something to wait for. |
1538 | */ | 1538 | */ |
1539 | wo->notask_error = 0; | 1539 | wo->notask_error = 0; |
1540 | } | 1540 | } |
1541 | 1541 | ||
1542 | /* | 1542 | /* |
1543 | * Wait for stopped. Depending on @ptrace, different stopped state | 1543 | * Wait for stopped. Depending on @ptrace, different stopped state |
1544 | * is used and the two don't interact with each other. | 1544 | * is used and the two don't interact with each other. |
1545 | */ | 1545 | */ |
1546 | ret = wait_task_stopped(wo, ptrace, p); | 1546 | ret = wait_task_stopped(wo, ptrace, p); |
1547 | if (ret) | 1547 | if (ret) |
1548 | return ret; | 1548 | return ret; |
1549 | 1549 | ||
1550 | /* | 1550 | /* |
1551 | * Wait for continued. There's only one continued state and the | 1551 | * Wait for continued. There's only one continued state and the |
1552 | * ptracer can consume it which can confuse the real parent. Don't | 1552 | * ptracer can consume it which can confuse the real parent. Don't |
1553 | * use WCONTINUED from ptracer. You don't need or want it. | 1553 | * use WCONTINUED from ptracer. You don't need or want it. |
1554 | */ | 1554 | */ |
1555 | return wait_task_continued(wo, p); | 1555 | return wait_task_continued(wo, p); |
1556 | } | 1556 | } |
1557 | 1557 | ||
1558 | /* | 1558 | /* |
1559 | * Do the work of do_wait() for one thread in the group, @tsk. | 1559 | * Do the work of do_wait() for one thread in the group, @tsk. |
1560 | * | 1560 | * |
1561 | * -ECHILD should be in ->notask_error before the first call. | 1561 | * -ECHILD should be in ->notask_error before the first call. |
1562 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. | 1562 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. |
1563 | * Returns zero if the search for a child should continue; then | 1563 | * Returns zero if the search for a child should continue; then |
1564 | * ->notask_error is 0 if there were any eligible children, | 1564 | * ->notask_error is 0 if there were any eligible children, |
1565 | * or another error from security_task_wait(), or still -ECHILD. | 1565 | * or another error from security_task_wait(), or still -ECHILD. |
1566 | */ | 1566 | */ |
1567 | static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) | 1567 | static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) |
1568 | { | 1568 | { |
1569 | struct task_struct *p; | 1569 | struct task_struct *p; |
1570 | 1570 | ||
1571 | list_for_each_entry(p, &tsk->children, sibling) { | 1571 | list_for_each_entry(p, &tsk->children, sibling) { |
1572 | int ret = wait_consider_task(wo, 0, p); | 1572 | int ret = wait_consider_task(wo, 0, p); |
1573 | if (ret) | 1573 | if (ret) |
1574 | return ret; | 1574 | return ret; |
1575 | } | 1575 | } |
1576 | 1576 | ||
1577 | return 0; | 1577 | return 0; |
1578 | } | 1578 | } |
1579 | 1579 | ||
1580 | static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) | 1580 | static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) |
1581 | { | 1581 | { |
1582 | struct task_struct *p; | 1582 | struct task_struct *p; |
1583 | 1583 | ||
1584 | list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { | 1584 | list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { |
1585 | int ret = wait_consider_task(wo, 1, p); | 1585 | int ret = wait_consider_task(wo, 1, p); |
1586 | if (ret) | 1586 | if (ret) |
1587 | return ret; | 1587 | return ret; |
1588 | } | 1588 | } |
1589 | 1589 | ||
1590 | return 0; | 1590 | return 0; |
1591 | } | 1591 | } |
1592 | 1592 | ||
1593 | static int child_wait_callback(wait_queue_t *wait, unsigned mode, | 1593 | static int child_wait_callback(wait_queue_t *wait, unsigned mode, |
1594 | int sync, void *key) | 1594 | int sync, void *key) |
1595 | { | 1595 | { |
1596 | struct wait_opts *wo = container_of(wait, struct wait_opts, | 1596 | struct wait_opts *wo = container_of(wait, struct wait_opts, |
1597 | child_wait); | 1597 | child_wait); |
1598 | struct task_struct *p = key; | 1598 | struct task_struct *p = key; |
1599 | 1599 | ||
1600 | if (!eligible_pid(wo, p)) | 1600 | if (!eligible_pid(wo, p)) |
1601 | return 0; | 1601 | return 0; |
1602 | 1602 | ||
1603 | if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) | 1603 | if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) |
1604 | return 0; | 1604 | return 0; |
1605 | 1605 | ||
1606 | return default_wake_function(wait, mode, sync, key); | 1606 | return default_wake_function(wait, mode, sync, key); |
1607 | } | 1607 | } |
1608 | 1608 | ||
1609 | void __wake_up_parent(struct task_struct *p, struct task_struct *parent) | 1609 | void __wake_up_parent(struct task_struct *p, struct task_struct *parent) |
1610 | { | 1610 | { |
1611 | __wake_up_sync_key(&parent->signal->wait_chldexit, | 1611 | __wake_up_sync_key(&parent->signal->wait_chldexit, |
1612 | TASK_INTERRUPTIBLE, 1, p); | 1612 | TASK_INTERRUPTIBLE, 1, p); |
1613 | } | 1613 | } |
1614 | 1614 | ||
1615 | static long do_wait(struct wait_opts *wo) | 1615 | static long do_wait(struct wait_opts *wo) |
1616 | { | 1616 | { |
1617 | struct task_struct *tsk; | 1617 | struct task_struct *tsk; |
1618 | int retval; | 1618 | int retval; |
1619 | 1619 | ||
1620 | trace_sched_process_wait(wo->wo_pid); | 1620 | trace_sched_process_wait(wo->wo_pid); |
1621 | 1621 | ||
1622 | init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); | 1622 | init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); |
1623 | wo->child_wait.private = current; | 1623 | wo->child_wait.private = current; |
1624 | add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); | 1624 | add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); |
1625 | repeat: | 1625 | repeat: |
1626 | /* | 1626 | /* |
1627 | * If there is nothing that can match our critiera just get out. | 1627 | * If there is nothing that can match our critiera just get out. |
1628 | * We will clear ->notask_error to zero if we see any child that | 1628 | * We will clear ->notask_error to zero if we see any child that |
1629 | * might later match our criteria, even if we are not able to reap | 1629 | * might later match our criteria, even if we are not able to reap |
1630 | * it yet. | 1630 | * it yet. |
1631 | */ | 1631 | */ |
1632 | wo->notask_error = -ECHILD; | 1632 | wo->notask_error = -ECHILD; |
1633 | if ((wo->wo_type < PIDTYPE_MAX) && | 1633 | if ((wo->wo_type < PIDTYPE_MAX) && |
1634 | (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) | 1634 | (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) |
1635 | goto notask; | 1635 | goto notask; |
1636 | 1636 | ||
1637 | set_current_state(TASK_INTERRUPTIBLE); | 1637 | set_current_state(TASK_INTERRUPTIBLE); |
1638 | read_lock(&tasklist_lock); | 1638 | read_lock(&tasklist_lock); |
1639 | tsk = current; | 1639 | tsk = current; |
1640 | do { | 1640 | do { |
1641 | retval = do_wait_thread(wo, tsk); | 1641 | retval = do_wait_thread(wo, tsk); |
1642 | if (retval) | 1642 | if (retval) |
1643 | goto end; | 1643 | goto end; |
1644 | 1644 | ||
1645 | retval = ptrace_do_wait(wo, tsk); | 1645 | retval = ptrace_do_wait(wo, tsk); |
1646 | if (retval) | 1646 | if (retval) |
1647 | goto end; | 1647 | goto end; |
1648 | 1648 | ||
1649 | if (wo->wo_flags & __WNOTHREAD) | 1649 | if (wo->wo_flags & __WNOTHREAD) |
1650 | break; | 1650 | break; |
1651 | } while_each_thread(current, tsk); | 1651 | } while_each_thread(current, tsk); |
1652 | read_unlock(&tasklist_lock); | 1652 | read_unlock(&tasklist_lock); |
1653 | 1653 | ||
1654 | notask: | 1654 | notask: |
1655 | retval = wo->notask_error; | 1655 | retval = wo->notask_error; |
1656 | if (!retval && !(wo->wo_flags & WNOHANG)) { | 1656 | if (!retval && !(wo->wo_flags & WNOHANG)) { |
1657 | retval = -ERESTARTSYS; | 1657 | retval = -ERESTARTSYS; |
1658 | if (!signal_pending(current)) { | 1658 | if (!signal_pending(current)) { |
1659 | schedule(); | 1659 | schedule(); |
1660 | goto repeat; | 1660 | goto repeat; |
1661 | } | 1661 | } |
1662 | } | 1662 | } |
1663 | end: | 1663 | end: |
1664 | __set_current_state(TASK_RUNNING); | 1664 | __set_current_state(TASK_RUNNING); |
1665 | remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); | 1665 | remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); |
1666 | return retval; | 1666 | return retval; |
1667 | } | 1667 | } |
1668 | 1668 | ||
1669 | SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | 1669 | SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, |
1670 | infop, int, options, struct rusage __user *, ru) | 1670 | infop, int, options, struct rusage __user *, ru) |
1671 | { | 1671 | { |
1672 | struct wait_opts wo; | 1672 | struct wait_opts wo; |
1673 | struct pid *pid = NULL; | 1673 | struct pid *pid = NULL; |
1674 | enum pid_type type; | 1674 | enum pid_type type; |
1675 | long ret; | 1675 | long ret; |
1676 | 1676 | ||
1677 | if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED)) | 1677 | if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED)) |
1678 | return -EINVAL; | 1678 | return -EINVAL; |
1679 | if (!(options & (WEXITED|WSTOPPED|WCONTINUED))) | 1679 | if (!(options & (WEXITED|WSTOPPED|WCONTINUED))) |
1680 | return -EINVAL; | 1680 | return -EINVAL; |
1681 | 1681 | ||
1682 | switch (which) { | 1682 | switch (which) { |
1683 | case P_ALL: | 1683 | case P_ALL: |
1684 | type = PIDTYPE_MAX; | 1684 | type = PIDTYPE_MAX; |
1685 | break; | 1685 | break; |
1686 | case P_PID: | 1686 | case P_PID: |
1687 | type = PIDTYPE_PID; | 1687 | type = PIDTYPE_PID; |
1688 | if (upid <= 0) | 1688 | if (upid <= 0) |
1689 | return -EINVAL; | 1689 | return -EINVAL; |
1690 | break; | 1690 | break; |
1691 | case P_PGID: | 1691 | case P_PGID: |
1692 | type = PIDTYPE_PGID; | 1692 | type = PIDTYPE_PGID; |
1693 | if (upid <= 0) | 1693 | if (upid <= 0) |
1694 | return -EINVAL; | 1694 | return -EINVAL; |
1695 | break; | 1695 | break; |
1696 | default: | 1696 | default: |
1697 | return -EINVAL; | 1697 | return -EINVAL; |
1698 | } | 1698 | } |
1699 | 1699 | ||
1700 | if (type < PIDTYPE_MAX) | 1700 | if (type < PIDTYPE_MAX) |
1701 | pid = find_get_pid(upid); | 1701 | pid = find_get_pid(upid); |
1702 | 1702 | ||
1703 | wo.wo_type = type; | 1703 | wo.wo_type = type; |
1704 | wo.wo_pid = pid; | 1704 | wo.wo_pid = pid; |
1705 | wo.wo_flags = options; | 1705 | wo.wo_flags = options; |
1706 | wo.wo_info = infop; | 1706 | wo.wo_info = infop; |
1707 | wo.wo_stat = NULL; | 1707 | wo.wo_stat = NULL; |
1708 | wo.wo_rusage = ru; | 1708 | wo.wo_rusage = ru; |
1709 | ret = do_wait(&wo); | 1709 | ret = do_wait(&wo); |
1710 | 1710 | ||
1711 | if (ret > 0) { | 1711 | if (ret > 0) { |
1712 | ret = 0; | 1712 | ret = 0; |
1713 | } else if (infop) { | 1713 | } else if (infop) { |
1714 | /* | 1714 | /* |
1715 | * For a WNOHANG return, clear out all the fields | 1715 | * For a WNOHANG return, clear out all the fields |
1716 | * we would set so the user can easily tell the | 1716 | * we would set so the user can easily tell the |
1717 | * difference. | 1717 | * difference. |
1718 | */ | 1718 | */ |
1719 | if (!ret) | 1719 | if (!ret) |
1720 | ret = put_user(0, &infop->si_signo); | 1720 | ret = put_user(0, &infop->si_signo); |
1721 | if (!ret) | 1721 | if (!ret) |
1722 | ret = put_user(0, &infop->si_errno); | 1722 | ret = put_user(0, &infop->si_errno); |
1723 | if (!ret) | 1723 | if (!ret) |
1724 | ret = put_user(0, &infop->si_code); | 1724 | ret = put_user(0, &infop->si_code); |
1725 | if (!ret) | 1725 | if (!ret) |
1726 | ret = put_user(0, &infop->si_pid); | 1726 | ret = put_user(0, &infop->si_pid); |
1727 | if (!ret) | 1727 | if (!ret) |
1728 | ret = put_user(0, &infop->si_uid); | 1728 | ret = put_user(0, &infop->si_uid); |
1729 | if (!ret) | 1729 | if (!ret) |
1730 | ret = put_user(0, &infop->si_status); | 1730 | ret = put_user(0, &infop->si_status); |
1731 | } | 1731 | } |
1732 | 1732 | ||
1733 | put_pid(pid); | 1733 | put_pid(pid); |
1734 | 1734 | ||
1735 | /* avoid REGPARM breakage on x86: */ | 1735 | /* avoid REGPARM breakage on x86: */ |
1736 | asmlinkage_protect(5, ret, which, upid, infop, options, ru); | 1736 | asmlinkage_protect(5, ret, which, upid, infop, options, ru); |
1737 | return ret; | 1737 | return ret; |
1738 | } | 1738 | } |
1739 | 1739 | ||
1740 | SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, | 1740 | SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, |
1741 | int, options, struct rusage __user *, ru) | 1741 | int, options, struct rusage __user *, ru) |
1742 | { | 1742 | { |
1743 | struct wait_opts wo; | 1743 | struct wait_opts wo; |
1744 | struct pid *pid = NULL; | 1744 | struct pid *pid = NULL; |
1745 | enum pid_type type; | 1745 | enum pid_type type; |
1746 | long ret; | 1746 | long ret; |
1747 | 1747 | ||
1748 | if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| | 1748 | if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| |
1749 | __WNOTHREAD|__WCLONE|__WALL)) | 1749 | __WNOTHREAD|__WCLONE|__WALL)) |
1750 | return -EINVAL; | 1750 | return -EINVAL; |
1751 | 1751 | ||
1752 | if (upid == -1) | 1752 | if (upid == -1) |
1753 | type = PIDTYPE_MAX; | 1753 | type = PIDTYPE_MAX; |
1754 | else if (upid < 0) { | 1754 | else if (upid < 0) { |
1755 | type = PIDTYPE_PGID; | 1755 | type = PIDTYPE_PGID; |
1756 | pid = find_get_pid(-upid); | 1756 | pid = find_get_pid(-upid); |
1757 | } else if (upid == 0) { | 1757 | } else if (upid == 0) { |
1758 | type = PIDTYPE_PGID; | 1758 | type = PIDTYPE_PGID; |
1759 | pid = get_task_pid(current, PIDTYPE_PGID); | 1759 | pid = get_task_pid(current, PIDTYPE_PGID); |
1760 | } else /* upid > 0 */ { | 1760 | } else /* upid > 0 */ { |
1761 | type = PIDTYPE_PID; | 1761 | type = PIDTYPE_PID; |
1762 | pid = find_get_pid(upid); | 1762 | pid = find_get_pid(upid); |
1763 | } | 1763 | } |
1764 | 1764 | ||
1765 | wo.wo_type = type; | 1765 | wo.wo_type = type; |
1766 | wo.wo_pid = pid; | 1766 | wo.wo_pid = pid; |
1767 | wo.wo_flags = options | WEXITED; | 1767 | wo.wo_flags = options | WEXITED; |
1768 | wo.wo_info = NULL; | 1768 | wo.wo_info = NULL; |
1769 | wo.wo_stat = stat_addr; | 1769 | wo.wo_stat = stat_addr; |
1770 | wo.wo_rusage = ru; | 1770 | wo.wo_rusage = ru; |
1771 | ret = do_wait(&wo); | 1771 | ret = do_wait(&wo); |
1772 | put_pid(pid); | 1772 | put_pid(pid); |
1773 | 1773 | ||
1774 | /* avoid REGPARM breakage on x86: */ | 1774 | /* avoid REGPARM breakage on x86: */ |
1775 | asmlinkage_protect(4, ret, upid, stat_addr, options, ru); | 1775 | asmlinkage_protect(4, ret, upid, stat_addr, options, ru); |
1776 | return ret; | 1776 | return ret; |
1777 | } | 1777 | } |
1778 | 1778 | ||
1779 | #ifdef __ARCH_WANT_SYS_WAITPID | 1779 | #ifdef __ARCH_WANT_SYS_WAITPID |
1780 | 1780 | ||
1781 | /* | 1781 | /* |
1782 | * sys_waitpid() remains for compatibility. waitpid() should be | 1782 | * sys_waitpid() remains for compatibility. waitpid() should be |
1783 | * implemented by calling sys_wait4() from libc.a. | 1783 | * implemented by calling sys_wait4() from libc.a. |
1784 | */ | 1784 | */ |
1785 | SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) | 1785 | SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) |
1786 | { | 1786 | { |
1787 | return sys_wait4(pid, stat_addr, options, NULL); | 1787 | return sys_wait4(pid, stat_addr, options, NULL); |
1788 | } | 1788 | } |
1789 | 1789 | ||
1790 | #endif | 1790 | #endif |
1791 | 1791 |
kernel/sched/cputime.c
1 | #include <linux/export.h> | 1 | #include <linux/export.h> |
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/tsacct_kern.h> | 3 | #include <linux/tsacct_kern.h> |
4 | #include <linux/kernel_stat.h> | 4 | #include <linux/kernel_stat.h> |
5 | #include <linux/static_key.h> | 5 | #include <linux/static_key.h> |
6 | #include "sched.h" | 6 | #include "sched.h" |
7 | 7 | ||
8 | 8 | ||
9 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 9 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * There are no locks covering percpu hardirq/softirq time. | 12 | * There are no locks covering percpu hardirq/softirq time. |
13 | * They are only modified in vtime_account, on corresponding CPU | 13 | * They are only modified in vtime_account, on corresponding CPU |
14 | * with interrupts disabled. So, writes are safe. | 14 | * with interrupts disabled. So, writes are safe. |
15 | * They are read and saved off onto struct rq in update_rq_clock(). | 15 | * They are read and saved off onto struct rq in update_rq_clock(). |
16 | * This may result in other CPU reading this CPU's irq time and can | 16 | * This may result in other CPU reading this CPU's irq time and can |
17 | * race with irq/vtime_account on this CPU. We would either get old | 17 | * race with irq/vtime_account on this CPU. We would either get old |
18 | * or new value with a side effect of accounting a slice of irq time to wrong | 18 | * or new value with a side effect of accounting a slice of irq time to wrong |
19 | * task when irq is in progress while we read rq->clock. That is a worthy | 19 | * task when irq is in progress while we read rq->clock. That is a worthy |
20 | * compromise in place of having locks on each irq in account_system_time. | 20 | * compromise in place of having locks on each irq in account_system_time. |
21 | */ | 21 | */ |
22 | DEFINE_PER_CPU(u64, cpu_hardirq_time); | 22 | DEFINE_PER_CPU(u64, cpu_hardirq_time); |
23 | DEFINE_PER_CPU(u64, cpu_softirq_time); | 23 | DEFINE_PER_CPU(u64, cpu_softirq_time); |
24 | 24 | ||
25 | static DEFINE_PER_CPU(u64, irq_start_time); | 25 | static DEFINE_PER_CPU(u64, irq_start_time); |
26 | static int sched_clock_irqtime; | 26 | static int sched_clock_irqtime; |
27 | 27 | ||
28 | void enable_sched_clock_irqtime(void) | 28 | void enable_sched_clock_irqtime(void) |
29 | { | 29 | { |
30 | sched_clock_irqtime = 1; | 30 | sched_clock_irqtime = 1; |
31 | } | 31 | } |
32 | 32 | ||
33 | void disable_sched_clock_irqtime(void) | 33 | void disable_sched_clock_irqtime(void) |
34 | { | 34 | { |
35 | sched_clock_irqtime = 0; | 35 | sched_clock_irqtime = 0; |
36 | } | 36 | } |
37 | 37 | ||
38 | #ifndef CONFIG_64BIT | 38 | #ifndef CONFIG_64BIT |
39 | DEFINE_PER_CPU(seqcount_t, irq_time_seq); | 39 | DEFINE_PER_CPU(seqcount_t, irq_time_seq); |
40 | #endif /* CONFIG_64BIT */ | 40 | #endif /* CONFIG_64BIT */ |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Called before incrementing preempt_count on {soft,}irq_enter | 43 | * Called before incrementing preempt_count on {soft,}irq_enter |
44 | * and before decrementing preempt_count on {soft,}irq_exit. | 44 | * and before decrementing preempt_count on {soft,}irq_exit. |
45 | */ | 45 | */ |
46 | void irqtime_account_irq(struct task_struct *curr) | 46 | void irqtime_account_irq(struct task_struct *curr) |
47 | { | 47 | { |
48 | unsigned long flags; | 48 | unsigned long flags; |
49 | s64 delta; | 49 | s64 delta; |
50 | int cpu; | 50 | int cpu; |
51 | 51 | ||
52 | if (!sched_clock_irqtime) | 52 | if (!sched_clock_irqtime) |
53 | return; | 53 | return; |
54 | 54 | ||
55 | local_irq_save(flags); | 55 | local_irq_save(flags); |
56 | 56 | ||
57 | cpu = smp_processor_id(); | 57 | cpu = smp_processor_id(); |
58 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); | 58 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); |
59 | __this_cpu_add(irq_start_time, delta); | 59 | __this_cpu_add(irq_start_time, delta); |
60 | 60 | ||
61 | irq_time_write_begin(); | 61 | irq_time_write_begin(); |
62 | /* | 62 | /* |
63 | * We do not account for softirq time from ksoftirqd here. | 63 | * We do not account for softirq time from ksoftirqd here. |
64 | * We want to continue accounting softirq time to ksoftirqd thread | 64 | * We want to continue accounting softirq time to ksoftirqd thread |
65 | * in that case, so as not to confuse scheduler with a special task | 65 | * in that case, so as not to confuse scheduler with a special task |
66 | * that do not consume any time, but still wants to run. | 66 | * that do not consume any time, but still wants to run. |
67 | */ | 67 | */ |
68 | if (hardirq_count()) | 68 | if (hardirq_count()) |
69 | __this_cpu_add(cpu_hardirq_time, delta); | 69 | __this_cpu_add(cpu_hardirq_time, delta); |
70 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) | 70 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) |
71 | __this_cpu_add(cpu_softirq_time, delta); | 71 | __this_cpu_add(cpu_softirq_time, delta); |
72 | 72 | ||
73 | irq_time_write_end(); | 73 | irq_time_write_end(); |
74 | local_irq_restore(flags); | 74 | local_irq_restore(flags); |
75 | } | 75 | } |
76 | EXPORT_SYMBOL_GPL(irqtime_account_irq); | 76 | EXPORT_SYMBOL_GPL(irqtime_account_irq); |
77 | 77 | ||
78 | static int irqtime_account_hi_update(void) | 78 | static int irqtime_account_hi_update(void) |
79 | { | 79 | { |
80 | u64 *cpustat = kcpustat_this_cpu->cpustat; | 80 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
81 | unsigned long flags; | 81 | unsigned long flags; |
82 | u64 latest_ns; | 82 | u64 latest_ns; |
83 | int ret = 0; | 83 | int ret = 0; |
84 | 84 | ||
85 | local_irq_save(flags); | 85 | local_irq_save(flags); |
86 | latest_ns = this_cpu_read(cpu_hardirq_time); | 86 | latest_ns = this_cpu_read(cpu_hardirq_time); |
87 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) | 87 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) |
88 | ret = 1; | 88 | ret = 1; |
89 | local_irq_restore(flags); | 89 | local_irq_restore(flags); |
90 | return ret; | 90 | return ret; |
91 | } | 91 | } |
92 | 92 | ||
93 | static int irqtime_account_si_update(void) | 93 | static int irqtime_account_si_update(void) |
94 | { | 94 | { |
95 | u64 *cpustat = kcpustat_this_cpu->cpustat; | 95 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
96 | unsigned long flags; | 96 | unsigned long flags; |
97 | u64 latest_ns; | 97 | u64 latest_ns; |
98 | int ret = 0; | 98 | int ret = 0; |
99 | 99 | ||
100 | local_irq_save(flags); | 100 | local_irq_save(flags); |
101 | latest_ns = this_cpu_read(cpu_softirq_time); | 101 | latest_ns = this_cpu_read(cpu_softirq_time); |
102 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) | 102 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) |
103 | ret = 1; | 103 | ret = 1; |
104 | local_irq_restore(flags); | 104 | local_irq_restore(flags); |
105 | return ret; | 105 | return ret; |
106 | } | 106 | } |
107 | 107 | ||
108 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 108 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
109 | 109 | ||
110 | #define sched_clock_irqtime (0) | 110 | #define sched_clock_irqtime (0) |
111 | 111 | ||
112 | #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ | 112 | #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ |
113 | 113 | ||
114 | static inline void task_group_account_field(struct task_struct *p, int index, | 114 | static inline void task_group_account_field(struct task_struct *p, int index, |
115 | u64 tmp) | 115 | u64 tmp) |
116 | { | 116 | { |
117 | #ifdef CONFIG_CGROUP_CPUACCT | 117 | #ifdef CONFIG_CGROUP_CPUACCT |
118 | struct kernel_cpustat *kcpustat; | 118 | struct kernel_cpustat *kcpustat; |
119 | struct cpuacct *ca; | 119 | struct cpuacct *ca; |
120 | #endif | 120 | #endif |
121 | /* | 121 | /* |
122 | * Since all updates are sure to touch the root cgroup, we | 122 | * Since all updates are sure to touch the root cgroup, we |
123 | * get ourselves ahead and touch it first. If the root cgroup | 123 | * get ourselves ahead and touch it first. If the root cgroup |
124 | * is the only cgroup, then nothing else should be necessary. | 124 | * is the only cgroup, then nothing else should be necessary. |
125 | * | 125 | * |
126 | */ | 126 | */ |
127 | __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; | 127 | __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; |
128 | 128 | ||
129 | #ifdef CONFIG_CGROUP_CPUACCT | 129 | #ifdef CONFIG_CGROUP_CPUACCT |
130 | if (unlikely(!cpuacct_subsys.active)) | 130 | if (unlikely(!cpuacct_subsys.active)) |
131 | return; | 131 | return; |
132 | 132 | ||
133 | rcu_read_lock(); | 133 | rcu_read_lock(); |
134 | ca = task_ca(p); | 134 | ca = task_ca(p); |
135 | while (ca && (ca != &root_cpuacct)) { | 135 | while (ca && (ca != &root_cpuacct)) { |
136 | kcpustat = this_cpu_ptr(ca->cpustat); | 136 | kcpustat = this_cpu_ptr(ca->cpustat); |
137 | kcpustat->cpustat[index] += tmp; | 137 | kcpustat->cpustat[index] += tmp; |
138 | ca = parent_ca(ca); | 138 | ca = parent_ca(ca); |
139 | } | 139 | } |
140 | rcu_read_unlock(); | 140 | rcu_read_unlock(); |
141 | #endif | 141 | #endif |
142 | } | 142 | } |
143 | 143 | ||
144 | /* | 144 | /* |
145 | * Account user cpu time to a process. | 145 | * Account user cpu time to a process. |
146 | * @p: the process that the cpu time gets accounted to | 146 | * @p: the process that the cpu time gets accounted to |
147 | * @cputime: the cpu time spent in user space since the last update | 147 | * @cputime: the cpu time spent in user space since the last update |
148 | * @cputime_scaled: cputime scaled by cpu frequency | 148 | * @cputime_scaled: cputime scaled by cpu frequency |
149 | */ | 149 | */ |
150 | void account_user_time(struct task_struct *p, cputime_t cputime, | 150 | void account_user_time(struct task_struct *p, cputime_t cputime, |
151 | cputime_t cputime_scaled) | 151 | cputime_t cputime_scaled) |
152 | { | 152 | { |
153 | int index; | 153 | int index; |
154 | 154 | ||
155 | /* Add user time to process. */ | 155 | /* Add user time to process. */ |
156 | p->utime += cputime; | 156 | p->utime += cputime; |
157 | p->utimescaled += cputime_scaled; | 157 | p->utimescaled += cputime_scaled; |
158 | account_group_user_time(p, cputime); | 158 | account_group_user_time(p, cputime); |
159 | 159 | ||
160 | index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; | 160 | index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; |
161 | 161 | ||
162 | /* Add user time to cpustat. */ | 162 | /* Add user time to cpustat. */ |
163 | task_group_account_field(p, index, (__force u64) cputime); | 163 | task_group_account_field(p, index, (__force u64) cputime); |
164 | 164 | ||
165 | /* Account for user time used */ | 165 | /* Account for user time used */ |
166 | acct_update_integrals(p); | 166 | acct_update_integrals(p); |
167 | } | 167 | } |
168 | 168 | ||
169 | /* | 169 | /* |
170 | * Account guest cpu time to a process. | 170 | * Account guest cpu time to a process. |
171 | * @p: the process that the cpu time gets accounted to | 171 | * @p: the process that the cpu time gets accounted to |
172 | * @cputime: the cpu time spent in virtual machine since the last update | 172 | * @cputime: the cpu time spent in virtual machine since the last update |
173 | * @cputime_scaled: cputime scaled by cpu frequency | 173 | * @cputime_scaled: cputime scaled by cpu frequency |
174 | */ | 174 | */ |
175 | static void account_guest_time(struct task_struct *p, cputime_t cputime, | 175 | static void account_guest_time(struct task_struct *p, cputime_t cputime, |
176 | cputime_t cputime_scaled) | 176 | cputime_t cputime_scaled) |
177 | { | 177 | { |
178 | u64 *cpustat = kcpustat_this_cpu->cpustat; | 178 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
179 | 179 | ||
180 | /* Add guest time to process. */ | 180 | /* Add guest time to process. */ |
181 | p->utime += cputime; | 181 | p->utime += cputime; |
182 | p->utimescaled += cputime_scaled; | 182 | p->utimescaled += cputime_scaled; |
183 | account_group_user_time(p, cputime); | 183 | account_group_user_time(p, cputime); |
184 | p->gtime += cputime; | 184 | p->gtime += cputime; |
185 | 185 | ||
186 | /* Add guest time to cpustat. */ | 186 | /* Add guest time to cpustat. */ |
187 | if (TASK_NICE(p) > 0) { | 187 | if (TASK_NICE(p) > 0) { |
188 | cpustat[CPUTIME_NICE] += (__force u64) cputime; | 188 | cpustat[CPUTIME_NICE] += (__force u64) cputime; |
189 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; | 189 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; |
190 | } else { | 190 | } else { |
191 | cpustat[CPUTIME_USER] += (__force u64) cputime; | 191 | cpustat[CPUTIME_USER] += (__force u64) cputime; |
192 | cpustat[CPUTIME_GUEST] += (__force u64) cputime; | 192 | cpustat[CPUTIME_GUEST] += (__force u64) cputime; |
193 | } | 193 | } |
194 | } | 194 | } |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * Account system cpu time to a process and desired cpustat field | 197 | * Account system cpu time to a process and desired cpustat field |
198 | * @p: the process that the cpu time gets accounted to | 198 | * @p: the process that the cpu time gets accounted to |
199 | * @cputime: the cpu time spent in kernel space since the last update | 199 | * @cputime: the cpu time spent in kernel space since the last update |
200 | * @cputime_scaled: cputime scaled by cpu frequency | 200 | * @cputime_scaled: cputime scaled by cpu frequency |
201 | * @target_cputime64: pointer to cpustat field that has to be updated | 201 | * @target_cputime64: pointer to cpustat field that has to be updated |
202 | */ | 202 | */ |
203 | static inline | 203 | static inline |
204 | void __account_system_time(struct task_struct *p, cputime_t cputime, | 204 | void __account_system_time(struct task_struct *p, cputime_t cputime, |
205 | cputime_t cputime_scaled, int index) | 205 | cputime_t cputime_scaled, int index) |
206 | { | 206 | { |
207 | /* Add system time to process. */ | 207 | /* Add system time to process. */ |
208 | p->stime += cputime; | 208 | p->stime += cputime; |
209 | p->stimescaled += cputime_scaled; | 209 | p->stimescaled += cputime_scaled; |
210 | account_group_system_time(p, cputime); | 210 | account_group_system_time(p, cputime); |
211 | 211 | ||
212 | /* Add system time to cpustat. */ | 212 | /* Add system time to cpustat. */ |
213 | task_group_account_field(p, index, (__force u64) cputime); | 213 | task_group_account_field(p, index, (__force u64) cputime); |
214 | 214 | ||
215 | /* Account for system time used */ | 215 | /* Account for system time used */ |
216 | acct_update_integrals(p); | 216 | acct_update_integrals(p); |
217 | } | 217 | } |
218 | 218 | ||
219 | /* | 219 | /* |
220 | * Account system cpu time to a process. | 220 | * Account system cpu time to a process. |
221 | * @p: the process that the cpu time gets accounted to | 221 | * @p: the process that the cpu time gets accounted to |
222 | * @hardirq_offset: the offset to subtract from hardirq_count() | 222 | * @hardirq_offset: the offset to subtract from hardirq_count() |
223 | * @cputime: the cpu time spent in kernel space since the last update | 223 | * @cputime: the cpu time spent in kernel space since the last update |
224 | * @cputime_scaled: cputime scaled by cpu frequency | 224 | * @cputime_scaled: cputime scaled by cpu frequency |
225 | */ | 225 | */ |
226 | void account_system_time(struct task_struct *p, int hardirq_offset, | 226 | void account_system_time(struct task_struct *p, int hardirq_offset, |
227 | cputime_t cputime, cputime_t cputime_scaled) | 227 | cputime_t cputime, cputime_t cputime_scaled) |
228 | { | 228 | { |
229 | int index; | 229 | int index; |
230 | 230 | ||
231 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | 231 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { |
232 | account_guest_time(p, cputime, cputime_scaled); | 232 | account_guest_time(p, cputime, cputime_scaled); |
233 | return; | 233 | return; |
234 | } | 234 | } |
235 | 235 | ||
236 | if (hardirq_count() - hardirq_offset) | 236 | if (hardirq_count() - hardirq_offset) |
237 | index = CPUTIME_IRQ; | 237 | index = CPUTIME_IRQ; |
238 | else if (in_serving_softirq()) | 238 | else if (in_serving_softirq()) |
239 | index = CPUTIME_SOFTIRQ; | 239 | index = CPUTIME_SOFTIRQ; |
240 | else | 240 | else |
241 | index = CPUTIME_SYSTEM; | 241 | index = CPUTIME_SYSTEM; |
242 | 242 | ||
243 | __account_system_time(p, cputime, cputime_scaled, index); | 243 | __account_system_time(p, cputime, cputime_scaled, index); |
244 | } | 244 | } |
245 | 245 | ||
246 | /* | 246 | /* |
247 | * Account for involuntary wait time. | 247 | * Account for involuntary wait time. |
248 | * @cputime: the cpu time spent in involuntary wait | 248 | * @cputime: the cpu time spent in involuntary wait |
249 | */ | 249 | */ |
250 | void account_steal_time(cputime_t cputime) | 250 | void account_steal_time(cputime_t cputime) |
251 | { | 251 | { |
252 | u64 *cpustat = kcpustat_this_cpu->cpustat; | 252 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
253 | 253 | ||
254 | cpustat[CPUTIME_STEAL] += (__force u64) cputime; | 254 | cpustat[CPUTIME_STEAL] += (__force u64) cputime; |
255 | } | 255 | } |
256 | 256 | ||
257 | /* | 257 | /* |
258 | * Account for idle time. | 258 | * Account for idle time. |
259 | * @cputime: the cpu time spent in idle wait | 259 | * @cputime: the cpu time spent in idle wait |
260 | */ | 260 | */ |
261 | void account_idle_time(cputime_t cputime) | 261 | void account_idle_time(cputime_t cputime) |
262 | { | 262 | { |
263 | u64 *cpustat = kcpustat_this_cpu->cpustat; | 263 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
264 | struct rq *rq = this_rq(); | 264 | struct rq *rq = this_rq(); |
265 | 265 | ||
266 | if (atomic_read(&rq->nr_iowait) > 0) | 266 | if (atomic_read(&rq->nr_iowait) > 0) |
267 | cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; | 267 | cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; |
268 | else | 268 | else |
269 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; | 269 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; |
270 | } | 270 | } |
271 | 271 | ||
272 | static __always_inline bool steal_account_process_tick(void) | 272 | static __always_inline bool steal_account_process_tick(void) |
273 | { | 273 | { |
274 | #ifdef CONFIG_PARAVIRT | 274 | #ifdef CONFIG_PARAVIRT |
275 | if (static_key_false(¶virt_steal_enabled)) { | 275 | if (static_key_false(¶virt_steal_enabled)) { |
276 | u64 steal, st = 0; | 276 | u64 steal, st = 0; |
277 | 277 | ||
278 | steal = paravirt_steal_clock(smp_processor_id()); | 278 | steal = paravirt_steal_clock(smp_processor_id()); |
279 | steal -= this_rq()->prev_steal_time; | 279 | steal -= this_rq()->prev_steal_time; |
280 | 280 | ||
281 | st = steal_ticks(steal); | 281 | st = steal_ticks(steal); |
282 | this_rq()->prev_steal_time += st * TICK_NSEC; | 282 | this_rq()->prev_steal_time += st * TICK_NSEC; |
283 | 283 | ||
284 | account_steal_time(st); | 284 | account_steal_time(st); |
285 | return st; | 285 | return st; |
286 | } | 286 | } |
287 | #endif | 287 | #endif |
288 | return false; | 288 | return false; |
289 | } | 289 | } |
290 | 290 | ||
291 | /* | 291 | /* |
292 | * Accumulate raw cputime values of dead tasks (sig->[us]time) and live | 292 | * Accumulate raw cputime values of dead tasks (sig->[us]time) and live |
293 | * tasks (sum on group iteration) belonging to @tsk's group. | 293 | * tasks (sum on group iteration) belonging to @tsk's group. |
294 | */ | 294 | */ |
295 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | 295 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) |
296 | { | 296 | { |
297 | struct signal_struct *sig = tsk->signal; | 297 | struct signal_struct *sig = tsk->signal; |
298 | struct task_struct *t; | 298 | struct task_struct *t; |
299 | 299 | ||
300 | times->utime = sig->utime; | 300 | times->utime = sig->utime; |
301 | times->stime = sig->stime; | 301 | times->stime = sig->stime; |
302 | times->sum_exec_runtime = sig->sum_sched_runtime; | 302 | times->sum_exec_runtime = sig->sum_sched_runtime; |
303 | 303 | ||
304 | rcu_read_lock(); | 304 | rcu_read_lock(); |
305 | /* make sure we can trust tsk->thread_group list */ | 305 | /* make sure we can trust tsk->thread_group list */ |
306 | if (!likely(pid_alive(tsk))) | 306 | if (!likely(pid_alive(tsk))) |
307 | goto out; | 307 | goto out; |
308 | 308 | ||
309 | t = tsk; | 309 | t = tsk; |
310 | do { | 310 | do { |
311 | times->utime += t->utime; | 311 | times->utime += t->utime; |
312 | times->stime += t->stime; | 312 | times->stime += t->stime; |
313 | times->sum_exec_runtime += task_sched_runtime(t); | 313 | times->sum_exec_runtime += task_sched_runtime(t); |
314 | } while_each_thread(tsk, t); | 314 | } while_each_thread(tsk, t); |
315 | out: | 315 | out: |
316 | rcu_read_unlock(); | 316 | rcu_read_unlock(); |
317 | } | 317 | } |
318 | 318 | ||
319 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 319 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
320 | 320 | ||
321 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 321 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
322 | /* | 322 | /* |
323 | * Account a tick to a process and cpustat | 323 | * Account a tick to a process and cpustat |
324 | * @p: the process that the cpu time gets accounted to | 324 | * @p: the process that the cpu time gets accounted to |
325 | * @user_tick: is the tick from userspace | 325 | * @user_tick: is the tick from userspace |
326 | * @rq: the pointer to rq | 326 | * @rq: the pointer to rq |
327 | * | 327 | * |
328 | * Tick demultiplexing follows the order | 328 | * Tick demultiplexing follows the order |
329 | * - pending hardirq update | 329 | * - pending hardirq update |
330 | * - pending softirq update | 330 | * - pending softirq update |
331 | * - user_time | 331 | * - user_time |
332 | * - idle_time | 332 | * - idle_time |
333 | * - system time | 333 | * - system time |
334 | * - check for guest_time | 334 | * - check for guest_time |
335 | * - else account as system_time | 335 | * - else account as system_time |
336 | * | 336 | * |
337 | * Check for hardirq is done both for system and user time as there is | 337 | * Check for hardirq is done both for system and user time as there is |
338 | * no timer going off while we are on hardirq and hence we may never get an | 338 | * no timer going off while we are on hardirq and hence we may never get an |
339 | * opportunity to update it solely in system time. | 339 | * opportunity to update it solely in system time. |
340 | * p->stime and friends are only updated on system time and not on irq | 340 | * p->stime and friends are only updated on system time and not on irq |
341 | * softirq as those do not count in task exec_runtime any more. | 341 | * softirq as those do not count in task exec_runtime any more. |
342 | */ | 342 | */ |
343 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 343 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
344 | struct rq *rq) | 344 | struct rq *rq) |
345 | { | 345 | { |
346 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 346 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
347 | u64 *cpustat = kcpustat_this_cpu->cpustat; | 347 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
348 | 348 | ||
349 | if (steal_account_process_tick()) | 349 | if (steal_account_process_tick()) |
350 | return; | 350 | return; |
351 | 351 | ||
352 | if (irqtime_account_hi_update()) { | 352 | if (irqtime_account_hi_update()) { |
353 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; | 353 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; |
354 | } else if (irqtime_account_si_update()) { | 354 | } else if (irqtime_account_si_update()) { |
355 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; | 355 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; |
356 | } else if (this_cpu_ksoftirqd() == p) { | 356 | } else if (this_cpu_ksoftirqd() == p) { |
357 | /* | 357 | /* |
358 | * ksoftirqd time do not get accounted in cpu_softirq_time. | 358 | * ksoftirqd time do not get accounted in cpu_softirq_time. |
359 | * So, we have to handle it separately here. | 359 | * So, we have to handle it separately here. |
360 | * Also, p->stime needs to be updated for ksoftirqd. | 360 | * Also, p->stime needs to be updated for ksoftirqd. |
361 | */ | 361 | */ |
362 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | 362 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, |
363 | CPUTIME_SOFTIRQ); | 363 | CPUTIME_SOFTIRQ); |
364 | } else if (user_tick) { | 364 | } else if (user_tick) { |
365 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 365 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
366 | } else if (p == rq->idle) { | 366 | } else if (p == rq->idle) { |
367 | account_idle_time(cputime_one_jiffy); | 367 | account_idle_time(cputime_one_jiffy); |
368 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | 368 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ |
369 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | 369 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); |
370 | } else { | 370 | } else { |
371 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | 371 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, |
372 | CPUTIME_SYSTEM); | 372 | CPUTIME_SYSTEM); |
373 | } | 373 | } |
374 | } | 374 | } |
375 | 375 | ||
376 | static void irqtime_account_idle_ticks(int ticks) | 376 | static void irqtime_account_idle_ticks(int ticks) |
377 | { | 377 | { |
378 | int i; | 378 | int i; |
379 | struct rq *rq = this_rq(); | 379 | struct rq *rq = this_rq(); |
380 | 380 | ||
381 | for (i = 0; i < ticks; i++) | 381 | for (i = 0; i < ticks; i++) |
382 | irqtime_account_process_tick(current, 0, rq); | 382 | irqtime_account_process_tick(current, 0, rq); |
383 | } | 383 | } |
384 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 384 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
385 | static void irqtime_account_idle_ticks(int ticks) {} | 385 | static void irqtime_account_idle_ticks(int ticks) {} |
386 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 386 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
387 | struct rq *rq) {} | 387 | struct rq *rq) {} |
388 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | 388 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
389 | 389 | ||
390 | /* | 390 | /* |
391 | * Account a single tick of cpu time. | 391 | * Account a single tick of cpu time. |
392 | * @p: the process that the cpu time gets accounted to | 392 | * @p: the process that the cpu time gets accounted to |
393 | * @user_tick: indicates if the tick is a user or a system tick | 393 | * @user_tick: indicates if the tick is a user or a system tick |
394 | */ | 394 | */ |
395 | void account_process_tick(struct task_struct *p, int user_tick) | 395 | void account_process_tick(struct task_struct *p, int user_tick) |
396 | { | 396 | { |
397 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 397 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
398 | struct rq *rq = this_rq(); | 398 | struct rq *rq = this_rq(); |
399 | 399 | ||
400 | if (sched_clock_irqtime) { | 400 | if (sched_clock_irqtime) { |
401 | irqtime_account_process_tick(p, user_tick, rq); | 401 | irqtime_account_process_tick(p, user_tick, rq); |
402 | return; | 402 | return; |
403 | } | 403 | } |
404 | 404 | ||
405 | if (steal_account_process_tick()) | 405 | if (steal_account_process_tick()) |
406 | return; | 406 | return; |
407 | 407 | ||
408 | if (user_tick) | 408 | if (user_tick) |
409 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 409 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
410 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | 410 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
411 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, | 411 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, |
412 | one_jiffy_scaled); | 412 | one_jiffy_scaled); |
413 | else | 413 | else |
414 | account_idle_time(cputime_one_jiffy); | 414 | account_idle_time(cputime_one_jiffy); |
415 | } | 415 | } |
416 | 416 | ||
417 | /* | 417 | /* |
418 | * Account multiple ticks of steal time. | 418 | * Account multiple ticks of steal time. |
419 | * @p: the process from which the cpu time has been stolen | 419 | * @p: the process from which the cpu time has been stolen |
420 | * @ticks: number of stolen ticks | 420 | * @ticks: number of stolen ticks |
421 | */ | 421 | */ |
422 | void account_steal_ticks(unsigned long ticks) | 422 | void account_steal_ticks(unsigned long ticks) |
423 | { | 423 | { |
424 | account_steal_time(jiffies_to_cputime(ticks)); | 424 | account_steal_time(jiffies_to_cputime(ticks)); |
425 | } | 425 | } |
426 | 426 | ||
427 | /* | 427 | /* |
428 | * Account multiple ticks of idle time. | 428 | * Account multiple ticks of idle time. |
429 | * @ticks: number of stolen ticks | 429 | * @ticks: number of stolen ticks |
430 | */ | 430 | */ |
431 | void account_idle_ticks(unsigned long ticks) | 431 | void account_idle_ticks(unsigned long ticks) |
432 | { | 432 | { |
433 | 433 | ||
434 | if (sched_clock_irqtime) { | 434 | if (sched_clock_irqtime) { |
435 | irqtime_account_idle_ticks(ticks); | 435 | irqtime_account_idle_ticks(ticks); |
436 | return; | 436 | return; |
437 | } | 437 | } |
438 | 438 | ||
439 | account_idle_time(jiffies_to_cputime(ticks)); | 439 | account_idle_time(jiffies_to_cputime(ticks)); |
440 | } | 440 | } |
441 | 441 | ||
442 | #endif | 442 | #endif |
443 | 443 | ||
444 | /* | 444 | /* |
445 | * Use precise platform statistics if available: | 445 | * Use precise platform statistics if available: |
446 | */ | 446 | */ |
447 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 447 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
448 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | 448 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
449 | { | 449 | { |
450 | *ut = p->utime; | 450 | *ut = p->utime; |
451 | *st = p->stime; | 451 | *st = p->stime; |
452 | } | 452 | } |
453 | 453 | ||
454 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | 454 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
455 | { | 455 | { |
456 | struct task_cputime cputime; | 456 | struct task_cputime cputime; |
457 | 457 | ||
458 | thread_group_cputime(p, &cputime); | 458 | thread_group_cputime(p, &cputime); |
459 | 459 | ||
460 | *ut = cputime.utime; | 460 | *ut = cputime.utime; |
461 | *st = cputime.stime; | 461 | *st = cputime.stime; |
462 | } | 462 | } |
463 | 463 | ||
464 | void vtime_account_system(struct task_struct *tsk) | 464 | void vtime_account_system(struct task_struct *tsk) |
465 | { | 465 | { |
466 | unsigned long flags; | 466 | unsigned long flags; |
467 | 467 | ||
468 | local_irq_save(flags); | 468 | local_irq_save(flags); |
469 | __vtime_account_system(tsk); | 469 | __vtime_account_system(tsk); |
470 | local_irq_restore(flags); | 470 | local_irq_restore(flags); |
471 | } | 471 | } |
472 | EXPORT_SYMBOL_GPL(vtime_account_system); | 472 | EXPORT_SYMBOL_GPL(vtime_account_system); |
473 | 473 | ||
474 | /* | 474 | /* |
475 | * Archs that account the whole time spent in the idle task | 475 | * Archs that account the whole time spent in the idle task |
476 | * (outside irq) as idle time can rely on this and just implement | 476 | * (outside irq) as idle time can rely on this and just implement |
477 | * __vtime_account_system() and __vtime_account_idle(). Archs that | 477 | * __vtime_account_system() and __vtime_account_idle(). Archs that |
478 | * have other meaning of the idle time (s390 only includes the | 478 | * have other meaning of the idle time (s390 only includes the |
479 | * time spent by the CPU when it's in low power mode) must override | 479 | * time spent by the CPU when it's in low power mode) must override |
480 | * vtime_account(). | 480 | * vtime_account(). |
481 | */ | 481 | */ |
482 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | 482 | #ifndef __ARCH_HAS_VTIME_ACCOUNT |
483 | void vtime_account(struct task_struct *tsk) | 483 | void vtime_account(struct task_struct *tsk) |
484 | { | 484 | { |
485 | unsigned long flags; | 485 | unsigned long flags; |
486 | 486 | ||
487 | local_irq_save(flags); | 487 | local_irq_save(flags); |
488 | 488 | ||
489 | if (in_interrupt() || !is_idle_task(tsk)) | 489 | if (in_interrupt() || !is_idle_task(tsk)) |
490 | __vtime_account_system(tsk); | 490 | __vtime_account_system(tsk); |
491 | else | 491 | else |
492 | __vtime_account_idle(tsk); | 492 | __vtime_account_idle(tsk); |
493 | 493 | ||
494 | local_irq_restore(flags); | 494 | local_irq_restore(flags); |
495 | } | 495 | } |
496 | EXPORT_SYMBOL_GPL(vtime_account); | 496 | EXPORT_SYMBOL_GPL(vtime_account); |
497 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | 497 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
498 | 498 | ||
499 | #else | 499 | #else |
500 | 500 | ||
501 | #ifndef nsecs_to_cputime | 501 | #ifndef nsecs_to_cputime |
502 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | 502 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) |
503 | #endif | 503 | #endif |
504 | 504 | ||
505 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) | 505 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) |
506 | { | 506 | { |
507 | u64 temp = (__force u64) rtime; | 507 | u64 temp = (__force u64) rtime; |
508 | 508 | ||
509 | temp *= (__force u64) utime; | 509 | temp *= (__force u64) utime; |
510 | 510 | ||
511 | if (sizeof(cputime_t) == 4) | 511 | if (sizeof(cputime_t) == 4) |
512 | temp = div_u64(temp, (__force u32) total); | 512 | temp = div_u64(temp, (__force u32) total); |
513 | else | 513 | else |
514 | temp = div64_u64(temp, (__force u64) total); | 514 | temp = div64_u64(temp, (__force u64) total); |
515 | 515 | ||
516 | return (__force cputime_t) temp; | 516 | return (__force cputime_t) temp; |
517 | } | 517 | } |
518 | 518 | ||
519 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | 519 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
520 | { | 520 | { |
521 | cputime_t rtime, utime = p->utime, total = utime + p->stime; | 521 | cputime_t rtime, utime = p->utime, total = utime + p->stime; |
522 | 522 | ||
523 | /* | 523 | /* |
524 | * Use CFS's precise accounting: | 524 | * Use CFS's precise accounting: |
525 | */ | 525 | */ |
526 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); | 526 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); |
527 | 527 | ||
528 | if (total) | 528 | if (total) |
529 | utime = scale_utime(utime, rtime, total); | 529 | utime = scale_utime(utime, rtime, total); |
530 | else | 530 | else |
531 | utime = rtime; | 531 | utime = rtime; |
532 | 532 | ||
533 | /* | 533 | /* |
534 | * Compare with previous values, to keep monotonicity: | 534 | * Compare with previous values, to keep monotonicity: |
535 | */ | 535 | */ |
536 | p->prev_utime = max(p->prev_utime, utime); | 536 | p->prev_utime = max(p->prev_utime, utime); |
537 | p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); | 537 | p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); |
538 | 538 | ||
539 | *ut = p->prev_utime; | 539 | *ut = p->prev_utime; |
540 | *st = p->prev_stime; | 540 | *st = p->prev_stime; |
541 | } | 541 | } |
542 | 542 | ||
543 | /* | 543 | /* |
544 | * Must be called with siglock held. | 544 | * Must be called with siglock held. |
545 | */ | 545 | */ |
546 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | 546 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
547 | { | 547 | { |
548 | struct signal_struct *sig = p->signal; | 548 | struct signal_struct *sig = p->signal; |
549 | struct task_cputime cputime; | 549 | struct task_cputime cputime; |
550 | cputime_t rtime, utime, total; | 550 | cputime_t rtime, utime, total; |
551 | 551 | ||
552 | thread_group_cputime(p, &cputime); | 552 | thread_group_cputime(p, &cputime); |
553 | 553 | ||
554 | total = cputime.utime + cputime.stime; | 554 | total = cputime.utime + cputime.stime; |
555 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); | 555 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); |
556 | 556 | ||
557 | if (total) | 557 | if (total) |
558 | utime = scale_utime(cputime.utime, rtime, total); | 558 | utime = scale_utime(cputime.utime, rtime, total); |
559 | else | 559 | else |
560 | utime = rtime; | 560 | utime = rtime; |
561 | 561 | ||
562 | sig->prev_utime = max(sig->prev_utime, utime); | 562 | sig->prev_utime = max(sig->prev_utime, utime); |
563 | sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); | 563 | sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); |
564 | 564 | ||
565 | *ut = sig->prev_utime; | 565 | *ut = sig->prev_utime; |
566 | *st = sig->prev_stime; | 566 | *st = sig->prev_stime; |
567 | } | 567 | } |
568 | #endif | 568 | #endif |
569 | 569 |
kernel/sys.c
1 | /* | 1 | /* |
2 | * linux/kernel/sys.c | 2 | * linux/kernel/sys.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/export.h> | 7 | #include <linux/export.h> |
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/utsname.h> | 9 | #include <linux/utsname.h> |
10 | #include <linux/mman.h> | 10 | #include <linux/mman.h> |
11 | #include <linux/reboot.h> | 11 | #include <linux/reboot.h> |
12 | #include <linux/prctl.h> | 12 | #include <linux/prctl.h> |
13 | #include <linux/highuid.h> | 13 | #include <linux/highuid.h> |
14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
15 | #include <linux/kmod.h> | 15 | #include <linux/kmod.h> |
16 | #include <linux/perf_event.h> | 16 | #include <linux/perf_event.h> |
17 | #include <linux/resource.h> | 17 | #include <linux/resource.h> |
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/kexec.h> | 19 | #include <linux/kexec.h> |
20 | #include <linux/workqueue.h> | 20 | #include <linux/workqueue.h> |
21 | #include <linux/capability.h> | 21 | #include <linux/capability.h> |
22 | #include <linux/device.h> | 22 | #include <linux/device.h> |
23 | #include <linux/key.h> | 23 | #include <linux/key.h> |
24 | #include <linux/times.h> | 24 | #include <linux/times.h> |
25 | #include <linux/posix-timers.h> | 25 | #include <linux/posix-timers.h> |
26 | #include <linux/security.h> | 26 | #include <linux/security.h> |
27 | #include <linux/dcookies.h> | 27 | #include <linux/dcookies.h> |
28 | #include <linux/suspend.h> | 28 | #include <linux/suspend.h> |
29 | #include <linux/tty.h> | 29 | #include <linux/tty.h> |
30 | #include <linux/signal.h> | 30 | #include <linux/signal.h> |
31 | #include <linux/cn_proc.h> | 31 | #include <linux/cn_proc.h> |
32 | #include <linux/getcpu.h> | 32 | #include <linux/getcpu.h> |
33 | #include <linux/task_io_accounting_ops.h> | 33 | #include <linux/task_io_accounting_ops.h> |
34 | #include <linux/seccomp.h> | 34 | #include <linux/seccomp.h> |
35 | #include <linux/cpu.h> | 35 | #include <linux/cpu.h> |
36 | #include <linux/personality.h> | 36 | #include <linux/personality.h> |
37 | #include <linux/ptrace.h> | 37 | #include <linux/ptrace.h> |
38 | #include <linux/fs_struct.h> | 38 | #include <linux/fs_struct.h> |
39 | #include <linux/file.h> | 39 | #include <linux/file.h> |
40 | #include <linux/mount.h> | 40 | #include <linux/mount.h> |
41 | #include <linux/gfp.h> | 41 | #include <linux/gfp.h> |
42 | #include <linux/syscore_ops.h> | 42 | #include <linux/syscore_ops.h> |
43 | #include <linux/version.h> | 43 | #include <linux/version.h> |
44 | #include <linux/ctype.h> | 44 | #include <linux/ctype.h> |
45 | 45 | ||
46 | #include <linux/compat.h> | 46 | #include <linux/compat.h> |
47 | #include <linux/syscalls.h> | 47 | #include <linux/syscalls.h> |
48 | #include <linux/kprobes.h> | 48 | #include <linux/kprobes.h> |
49 | #include <linux/user_namespace.h> | 49 | #include <linux/user_namespace.h> |
50 | 50 | ||
51 | #include <linux/kmsg_dump.h> | 51 | #include <linux/kmsg_dump.h> |
52 | /* Move somewhere else to avoid recompiling? */ | 52 | /* Move somewhere else to avoid recompiling? */ |
53 | #include <generated/utsrelease.h> | 53 | #include <generated/utsrelease.h> |
54 | 54 | ||
55 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
56 | #include <asm/io.h> | 56 | #include <asm/io.h> |
57 | #include <asm/unistd.h> | 57 | #include <asm/unistd.h> |
58 | 58 | ||
59 | #ifndef SET_UNALIGN_CTL | 59 | #ifndef SET_UNALIGN_CTL |
60 | # define SET_UNALIGN_CTL(a,b) (-EINVAL) | 60 | # define SET_UNALIGN_CTL(a,b) (-EINVAL) |
61 | #endif | 61 | #endif |
62 | #ifndef GET_UNALIGN_CTL | 62 | #ifndef GET_UNALIGN_CTL |
63 | # define GET_UNALIGN_CTL(a,b) (-EINVAL) | 63 | # define GET_UNALIGN_CTL(a,b) (-EINVAL) |
64 | #endif | 64 | #endif |
65 | #ifndef SET_FPEMU_CTL | 65 | #ifndef SET_FPEMU_CTL |
66 | # define SET_FPEMU_CTL(a,b) (-EINVAL) | 66 | # define SET_FPEMU_CTL(a,b) (-EINVAL) |
67 | #endif | 67 | #endif |
68 | #ifndef GET_FPEMU_CTL | 68 | #ifndef GET_FPEMU_CTL |
69 | # define GET_FPEMU_CTL(a,b) (-EINVAL) | 69 | # define GET_FPEMU_CTL(a,b) (-EINVAL) |
70 | #endif | 70 | #endif |
71 | #ifndef SET_FPEXC_CTL | 71 | #ifndef SET_FPEXC_CTL |
72 | # define SET_FPEXC_CTL(a,b) (-EINVAL) | 72 | # define SET_FPEXC_CTL(a,b) (-EINVAL) |
73 | #endif | 73 | #endif |
74 | #ifndef GET_FPEXC_CTL | 74 | #ifndef GET_FPEXC_CTL |
75 | # define GET_FPEXC_CTL(a,b) (-EINVAL) | 75 | # define GET_FPEXC_CTL(a,b) (-EINVAL) |
76 | #endif | 76 | #endif |
77 | #ifndef GET_ENDIAN | 77 | #ifndef GET_ENDIAN |
78 | # define GET_ENDIAN(a,b) (-EINVAL) | 78 | # define GET_ENDIAN(a,b) (-EINVAL) |
79 | #endif | 79 | #endif |
80 | #ifndef SET_ENDIAN | 80 | #ifndef SET_ENDIAN |
81 | # define SET_ENDIAN(a,b) (-EINVAL) | 81 | # define SET_ENDIAN(a,b) (-EINVAL) |
82 | #endif | 82 | #endif |
83 | #ifndef GET_TSC_CTL | 83 | #ifndef GET_TSC_CTL |
84 | # define GET_TSC_CTL(a) (-EINVAL) | 84 | # define GET_TSC_CTL(a) (-EINVAL) |
85 | #endif | 85 | #endif |
86 | #ifndef SET_TSC_CTL | 86 | #ifndef SET_TSC_CTL |
87 | # define SET_TSC_CTL(a) (-EINVAL) | 87 | # define SET_TSC_CTL(a) (-EINVAL) |
88 | #endif | 88 | #endif |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * this is where the system-wide overflow UID and GID are defined, for | 91 | * this is where the system-wide overflow UID and GID are defined, for |
92 | * architectures that now have 32-bit UID/GID but didn't in the past | 92 | * architectures that now have 32-bit UID/GID but didn't in the past |
93 | */ | 93 | */ |
94 | 94 | ||
95 | int overflowuid = DEFAULT_OVERFLOWUID; | 95 | int overflowuid = DEFAULT_OVERFLOWUID; |
96 | int overflowgid = DEFAULT_OVERFLOWGID; | 96 | int overflowgid = DEFAULT_OVERFLOWGID; |
97 | 97 | ||
98 | EXPORT_SYMBOL(overflowuid); | 98 | EXPORT_SYMBOL(overflowuid); |
99 | EXPORT_SYMBOL(overflowgid); | 99 | EXPORT_SYMBOL(overflowgid); |
100 | 100 | ||
101 | /* | 101 | /* |
102 | * the same as above, but for filesystems which can only store a 16-bit | 102 | * the same as above, but for filesystems which can only store a 16-bit |
103 | * UID and GID. as such, this is needed on all architectures | 103 | * UID and GID. as such, this is needed on all architectures |
104 | */ | 104 | */ |
105 | 105 | ||
106 | int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; | 106 | int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; |
107 | int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; | 107 | int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; |
108 | 108 | ||
109 | EXPORT_SYMBOL(fs_overflowuid); | 109 | EXPORT_SYMBOL(fs_overflowuid); |
110 | EXPORT_SYMBOL(fs_overflowgid); | 110 | EXPORT_SYMBOL(fs_overflowgid); |
111 | 111 | ||
112 | /* | 112 | /* |
113 | * this indicates whether you can reboot with ctrl-alt-del: the default is yes | 113 | * this indicates whether you can reboot with ctrl-alt-del: the default is yes |
114 | */ | 114 | */ |
115 | 115 | ||
116 | int C_A_D = 1; | 116 | int C_A_D = 1; |
117 | struct pid *cad_pid; | 117 | struct pid *cad_pid; |
118 | EXPORT_SYMBOL(cad_pid); | 118 | EXPORT_SYMBOL(cad_pid); |
119 | 119 | ||
120 | /* | 120 | /* |
121 | * If set, this is used for preparing the system to power off. | 121 | * If set, this is used for preparing the system to power off. |
122 | */ | 122 | */ |
123 | 123 | ||
124 | void (*pm_power_off_prepare)(void); | 124 | void (*pm_power_off_prepare)(void); |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * Returns true if current's euid is same as p's uid or euid, | 127 | * Returns true if current's euid is same as p's uid or euid, |
128 | * or has CAP_SYS_NICE to p's user_ns. | 128 | * or has CAP_SYS_NICE to p's user_ns. |
129 | * | 129 | * |
130 | * Called with rcu_read_lock, creds are safe | 130 | * Called with rcu_read_lock, creds are safe |
131 | */ | 131 | */ |
132 | static bool set_one_prio_perm(struct task_struct *p) | 132 | static bool set_one_prio_perm(struct task_struct *p) |
133 | { | 133 | { |
134 | const struct cred *cred = current_cred(), *pcred = __task_cred(p); | 134 | const struct cred *cred = current_cred(), *pcred = __task_cred(p); |
135 | 135 | ||
136 | if (uid_eq(pcred->uid, cred->euid) || | 136 | if (uid_eq(pcred->uid, cred->euid) || |
137 | uid_eq(pcred->euid, cred->euid)) | 137 | uid_eq(pcred->euid, cred->euid)) |
138 | return true; | 138 | return true; |
139 | if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) | 139 | if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) |
140 | return true; | 140 | return true; |
141 | return false; | 141 | return false; |
142 | } | 142 | } |
143 | 143 | ||
144 | /* | 144 | /* |
145 | * set the priority of a task | 145 | * set the priority of a task |
146 | * - the caller must hold the RCU read lock | 146 | * - the caller must hold the RCU read lock |
147 | */ | 147 | */ |
148 | static int set_one_prio(struct task_struct *p, int niceval, int error) | 148 | static int set_one_prio(struct task_struct *p, int niceval, int error) |
149 | { | 149 | { |
150 | int no_nice; | 150 | int no_nice; |
151 | 151 | ||
152 | if (!set_one_prio_perm(p)) { | 152 | if (!set_one_prio_perm(p)) { |
153 | error = -EPERM; | 153 | error = -EPERM; |
154 | goto out; | 154 | goto out; |
155 | } | 155 | } |
156 | if (niceval < task_nice(p) && !can_nice(p, niceval)) { | 156 | if (niceval < task_nice(p) && !can_nice(p, niceval)) { |
157 | error = -EACCES; | 157 | error = -EACCES; |
158 | goto out; | 158 | goto out; |
159 | } | 159 | } |
160 | no_nice = security_task_setnice(p, niceval); | 160 | no_nice = security_task_setnice(p, niceval); |
161 | if (no_nice) { | 161 | if (no_nice) { |
162 | error = no_nice; | 162 | error = no_nice; |
163 | goto out; | 163 | goto out; |
164 | } | 164 | } |
165 | if (error == -ESRCH) | 165 | if (error == -ESRCH) |
166 | error = 0; | 166 | error = 0; |
167 | set_user_nice(p, niceval); | 167 | set_user_nice(p, niceval); |
168 | out: | 168 | out: |
169 | return error; | 169 | return error; |
170 | } | 170 | } |
171 | 171 | ||
172 | SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) | 172 | SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) |
173 | { | 173 | { |
174 | struct task_struct *g, *p; | 174 | struct task_struct *g, *p; |
175 | struct user_struct *user; | 175 | struct user_struct *user; |
176 | const struct cred *cred = current_cred(); | 176 | const struct cred *cred = current_cred(); |
177 | int error = -EINVAL; | 177 | int error = -EINVAL; |
178 | struct pid *pgrp; | 178 | struct pid *pgrp; |
179 | kuid_t uid; | 179 | kuid_t uid; |
180 | 180 | ||
181 | if (which > PRIO_USER || which < PRIO_PROCESS) | 181 | if (which > PRIO_USER || which < PRIO_PROCESS) |
182 | goto out; | 182 | goto out; |
183 | 183 | ||
184 | /* normalize: avoid signed division (rounding problems) */ | 184 | /* normalize: avoid signed division (rounding problems) */ |
185 | error = -ESRCH; | 185 | error = -ESRCH; |
186 | if (niceval < -20) | 186 | if (niceval < -20) |
187 | niceval = -20; | 187 | niceval = -20; |
188 | if (niceval > 19) | 188 | if (niceval > 19) |
189 | niceval = 19; | 189 | niceval = 19; |
190 | 190 | ||
191 | rcu_read_lock(); | 191 | rcu_read_lock(); |
192 | read_lock(&tasklist_lock); | 192 | read_lock(&tasklist_lock); |
193 | switch (which) { | 193 | switch (which) { |
194 | case PRIO_PROCESS: | 194 | case PRIO_PROCESS: |
195 | if (who) | 195 | if (who) |
196 | p = find_task_by_vpid(who); | 196 | p = find_task_by_vpid(who); |
197 | else | 197 | else |
198 | p = current; | 198 | p = current; |
199 | if (p) | 199 | if (p) |
200 | error = set_one_prio(p, niceval, error); | 200 | error = set_one_prio(p, niceval, error); |
201 | break; | 201 | break; |
202 | case PRIO_PGRP: | 202 | case PRIO_PGRP: |
203 | if (who) | 203 | if (who) |
204 | pgrp = find_vpid(who); | 204 | pgrp = find_vpid(who); |
205 | else | 205 | else |
206 | pgrp = task_pgrp(current); | 206 | pgrp = task_pgrp(current); |
207 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { | 207 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
208 | error = set_one_prio(p, niceval, error); | 208 | error = set_one_prio(p, niceval, error); |
209 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); | 209 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
210 | break; | 210 | break; |
211 | case PRIO_USER: | 211 | case PRIO_USER: |
212 | uid = make_kuid(cred->user_ns, who); | 212 | uid = make_kuid(cred->user_ns, who); |
213 | user = cred->user; | 213 | user = cred->user; |
214 | if (!who) | 214 | if (!who) |
215 | uid = cred->uid; | 215 | uid = cred->uid; |
216 | else if (!uid_eq(uid, cred->uid) && | 216 | else if (!uid_eq(uid, cred->uid) && |
217 | !(user = find_user(uid))) | 217 | !(user = find_user(uid))) |
218 | goto out_unlock; /* No processes for this user */ | 218 | goto out_unlock; /* No processes for this user */ |
219 | 219 | ||
220 | do_each_thread(g, p) { | 220 | do_each_thread(g, p) { |
221 | if (uid_eq(task_uid(p), uid)) | 221 | if (uid_eq(task_uid(p), uid)) |
222 | error = set_one_prio(p, niceval, error); | 222 | error = set_one_prio(p, niceval, error); |
223 | } while_each_thread(g, p); | 223 | } while_each_thread(g, p); |
224 | if (!uid_eq(uid, cred->uid)) | 224 | if (!uid_eq(uid, cred->uid)) |
225 | free_uid(user); /* For find_user() */ | 225 | free_uid(user); /* For find_user() */ |
226 | break; | 226 | break; |
227 | } | 227 | } |
228 | out_unlock: | 228 | out_unlock: |
229 | read_unlock(&tasklist_lock); | 229 | read_unlock(&tasklist_lock); |
230 | rcu_read_unlock(); | 230 | rcu_read_unlock(); |
231 | out: | 231 | out: |
232 | return error; | 232 | return error; |
233 | } | 233 | } |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Ugh. To avoid negative return values, "getpriority()" will | 236 | * Ugh. To avoid negative return values, "getpriority()" will |
237 | * not return the normal nice-value, but a negated value that | 237 | * not return the normal nice-value, but a negated value that |
238 | * has been offset by 20 (ie it returns 40..1 instead of -20..19) | 238 | * has been offset by 20 (ie it returns 40..1 instead of -20..19) |
239 | * to stay compatible. | 239 | * to stay compatible. |
240 | */ | 240 | */ |
241 | SYSCALL_DEFINE2(getpriority, int, which, int, who) | 241 | SYSCALL_DEFINE2(getpriority, int, which, int, who) |
242 | { | 242 | { |
243 | struct task_struct *g, *p; | 243 | struct task_struct *g, *p; |
244 | struct user_struct *user; | 244 | struct user_struct *user; |
245 | const struct cred *cred = current_cred(); | 245 | const struct cred *cred = current_cred(); |
246 | long niceval, retval = -ESRCH; | 246 | long niceval, retval = -ESRCH; |
247 | struct pid *pgrp; | 247 | struct pid *pgrp; |
248 | kuid_t uid; | 248 | kuid_t uid; |
249 | 249 | ||
250 | if (which > PRIO_USER || which < PRIO_PROCESS) | 250 | if (which > PRIO_USER || which < PRIO_PROCESS) |
251 | return -EINVAL; | 251 | return -EINVAL; |
252 | 252 | ||
253 | rcu_read_lock(); | 253 | rcu_read_lock(); |
254 | read_lock(&tasklist_lock); | 254 | read_lock(&tasklist_lock); |
255 | switch (which) { | 255 | switch (which) { |
256 | case PRIO_PROCESS: | 256 | case PRIO_PROCESS: |
257 | if (who) | 257 | if (who) |
258 | p = find_task_by_vpid(who); | 258 | p = find_task_by_vpid(who); |
259 | else | 259 | else |
260 | p = current; | 260 | p = current; |
261 | if (p) { | 261 | if (p) { |
262 | niceval = 20 - task_nice(p); | 262 | niceval = 20 - task_nice(p); |
263 | if (niceval > retval) | 263 | if (niceval > retval) |
264 | retval = niceval; | 264 | retval = niceval; |
265 | } | 265 | } |
266 | break; | 266 | break; |
267 | case PRIO_PGRP: | 267 | case PRIO_PGRP: |
268 | if (who) | 268 | if (who) |
269 | pgrp = find_vpid(who); | 269 | pgrp = find_vpid(who); |
270 | else | 270 | else |
271 | pgrp = task_pgrp(current); | 271 | pgrp = task_pgrp(current); |
272 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { | 272 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
273 | niceval = 20 - task_nice(p); | 273 | niceval = 20 - task_nice(p); |
274 | if (niceval > retval) | 274 | if (niceval > retval) |
275 | retval = niceval; | 275 | retval = niceval; |
276 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); | 276 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
277 | break; | 277 | break; |
278 | case PRIO_USER: | 278 | case PRIO_USER: |
279 | uid = make_kuid(cred->user_ns, who); | 279 | uid = make_kuid(cred->user_ns, who); |
280 | user = cred->user; | 280 | user = cred->user; |
281 | if (!who) | 281 | if (!who) |
282 | uid = cred->uid; | 282 | uid = cred->uid; |
283 | else if (!uid_eq(uid, cred->uid) && | 283 | else if (!uid_eq(uid, cred->uid) && |
284 | !(user = find_user(uid))) | 284 | !(user = find_user(uid))) |
285 | goto out_unlock; /* No processes for this user */ | 285 | goto out_unlock; /* No processes for this user */ |
286 | 286 | ||
287 | do_each_thread(g, p) { | 287 | do_each_thread(g, p) { |
288 | if (uid_eq(task_uid(p), uid)) { | 288 | if (uid_eq(task_uid(p), uid)) { |
289 | niceval = 20 - task_nice(p); | 289 | niceval = 20 - task_nice(p); |
290 | if (niceval > retval) | 290 | if (niceval > retval) |
291 | retval = niceval; | 291 | retval = niceval; |
292 | } | 292 | } |
293 | } while_each_thread(g, p); | 293 | } while_each_thread(g, p); |
294 | if (!uid_eq(uid, cred->uid)) | 294 | if (!uid_eq(uid, cred->uid)) |
295 | free_uid(user); /* for find_user() */ | 295 | free_uid(user); /* for find_user() */ |
296 | break; | 296 | break; |
297 | } | 297 | } |
298 | out_unlock: | 298 | out_unlock: |
299 | read_unlock(&tasklist_lock); | 299 | read_unlock(&tasklist_lock); |
300 | rcu_read_unlock(); | 300 | rcu_read_unlock(); |
301 | 301 | ||
302 | return retval; | 302 | return retval; |
303 | } | 303 | } |
304 | 304 | ||
305 | /** | 305 | /** |
306 | * emergency_restart - reboot the system | 306 | * emergency_restart - reboot the system |
307 | * | 307 | * |
308 | * Without shutting down any hardware or taking any locks | 308 | * Without shutting down any hardware or taking any locks |
309 | * reboot the system. This is called when we know we are in | 309 | * reboot the system. This is called when we know we are in |
310 | * trouble so this is our best effort to reboot. This is | 310 | * trouble so this is our best effort to reboot. This is |
311 | * safe to call in interrupt context. | 311 | * safe to call in interrupt context. |
312 | */ | 312 | */ |
313 | void emergency_restart(void) | 313 | void emergency_restart(void) |
314 | { | 314 | { |
315 | kmsg_dump(KMSG_DUMP_EMERG); | 315 | kmsg_dump(KMSG_DUMP_EMERG); |
316 | machine_emergency_restart(); | 316 | machine_emergency_restart(); |
317 | } | 317 | } |
318 | EXPORT_SYMBOL_GPL(emergency_restart); | 318 | EXPORT_SYMBOL_GPL(emergency_restart); |
319 | 319 | ||
320 | void kernel_restart_prepare(char *cmd) | 320 | void kernel_restart_prepare(char *cmd) |
321 | { | 321 | { |
322 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); | 322 | blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); |
323 | system_state = SYSTEM_RESTART; | 323 | system_state = SYSTEM_RESTART; |
324 | usermodehelper_disable(); | 324 | usermodehelper_disable(); |
325 | device_shutdown(); | 325 | device_shutdown(); |
326 | syscore_shutdown(); | 326 | syscore_shutdown(); |
327 | } | 327 | } |
328 | 328 | ||
329 | /** | 329 | /** |
330 | * register_reboot_notifier - Register function to be called at reboot time | 330 | * register_reboot_notifier - Register function to be called at reboot time |
331 | * @nb: Info about notifier function to be called | 331 | * @nb: Info about notifier function to be called |
332 | * | 332 | * |
333 | * Registers a function with the list of functions | 333 | * Registers a function with the list of functions |
334 | * to be called at reboot time. | 334 | * to be called at reboot time. |
335 | * | 335 | * |
336 | * Currently always returns zero, as blocking_notifier_chain_register() | 336 | * Currently always returns zero, as blocking_notifier_chain_register() |
337 | * always returns zero. | 337 | * always returns zero. |
338 | */ | 338 | */ |
339 | int register_reboot_notifier(struct notifier_block *nb) | 339 | int register_reboot_notifier(struct notifier_block *nb) |
340 | { | 340 | { |
341 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); | 341 | return blocking_notifier_chain_register(&reboot_notifier_list, nb); |
342 | } | 342 | } |
343 | EXPORT_SYMBOL(register_reboot_notifier); | 343 | EXPORT_SYMBOL(register_reboot_notifier); |
344 | 344 | ||
345 | /** | 345 | /** |
346 | * unregister_reboot_notifier - Unregister previously registered reboot notifier | 346 | * unregister_reboot_notifier - Unregister previously registered reboot notifier |
347 | * @nb: Hook to be unregistered | 347 | * @nb: Hook to be unregistered |
348 | * | 348 | * |
349 | * Unregisters a previously registered reboot | 349 | * Unregisters a previously registered reboot |
350 | * notifier function. | 350 | * notifier function. |
351 | * | 351 | * |
352 | * Returns zero on success, or %-ENOENT on failure. | 352 | * Returns zero on success, or %-ENOENT on failure. |
353 | */ | 353 | */ |
354 | int unregister_reboot_notifier(struct notifier_block *nb) | 354 | int unregister_reboot_notifier(struct notifier_block *nb) |
355 | { | 355 | { |
356 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); | 356 | return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); |
357 | } | 357 | } |
358 | EXPORT_SYMBOL(unregister_reboot_notifier); | 358 | EXPORT_SYMBOL(unregister_reboot_notifier); |
359 | 359 | ||
360 | /** | 360 | /** |
361 | * kernel_restart - reboot the system | 361 | * kernel_restart - reboot the system |
362 | * @cmd: pointer to buffer containing command to execute for restart | 362 | * @cmd: pointer to buffer containing command to execute for restart |
363 | * or %NULL | 363 | * or %NULL |
364 | * | 364 | * |
365 | * Shutdown everything and perform a clean reboot. | 365 | * Shutdown everything and perform a clean reboot. |
366 | * This is not safe to call in interrupt context. | 366 | * This is not safe to call in interrupt context. |
367 | */ | 367 | */ |
368 | void kernel_restart(char *cmd) | 368 | void kernel_restart(char *cmd) |
369 | { | 369 | { |
370 | kernel_restart_prepare(cmd); | 370 | kernel_restart_prepare(cmd); |
371 | disable_nonboot_cpus(); | 371 | disable_nonboot_cpus(); |
372 | if (!cmd) | 372 | if (!cmd) |
373 | printk(KERN_EMERG "Restarting system.\n"); | 373 | printk(KERN_EMERG "Restarting system.\n"); |
374 | else | 374 | else |
375 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); | 375 | printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); |
376 | kmsg_dump(KMSG_DUMP_RESTART); | 376 | kmsg_dump(KMSG_DUMP_RESTART); |
377 | machine_restart(cmd); | 377 | machine_restart(cmd); |
378 | } | 378 | } |
379 | EXPORT_SYMBOL_GPL(kernel_restart); | 379 | EXPORT_SYMBOL_GPL(kernel_restart); |
380 | 380 | ||
381 | static void kernel_shutdown_prepare(enum system_states state) | 381 | static void kernel_shutdown_prepare(enum system_states state) |
382 | { | 382 | { |
383 | blocking_notifier_call_chain(&reboot_notifier_list, | 383 | blocking_notifier_call_chain(&reboot_notifier_list, |
384 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); | 384 | (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); |
385 | system_state = state; | 385 | system_state = state; |
386 | usermodehelper_disable(); | 386 | usermodehelper_disable(); |
387 | device_shutdown(); | 387 | device_shutdown(); |
388 | } | 388 | } |
389 | /** | 389 | /** |
390 | * kernel_halt - halt the system | 390 | * kernel_halt - halt the system |
391 | * | 391 | * |
392 | * Shutdown everything and perform a clean system halt. | 392 | * Shutdown everything and perform a clean system halt. |
393 | */ | 393 | */ |
394 | void kernel_halt(void) | 394 | void kernel_halt(void) |
395 | { | 395 | { |
396 | kernel_shutdown_prepare(SYSTEM_HALT); | 396 | kernel_shutdown_prepare(SYSTEM_HALT); |
397 | syscore_shutdown(); | 397 | syscore_shutdown(); |
398 | printk(KERN_EMERG "System halted.\n"); | 398 | printk(KERN_EMERG "System halted.\n"); |
399 | kmsg_dump(KMSG_DUMP_HALT); | 399 | kmsg_dump(KMSG_DUMP_HALT); |
400 | machine_halt(); | 400 | machine_halt(); |
401 | } | 401 | } |
402 | 402 | ||
403 | EXPORT_SYMBOL_GPL(kernel_halt); | 403 | EXPORT_SYMBOL_GPL(kernel_halt); |
404 | 404 | ||
405 | /** | 405 | /** |
406 | * kernel_power_off - power_off the system | 406 | * kernel_power_off - power_off the system |
407 | * | 407 | * |
408 | * Shutdown everything and perform a clean system power_off. | 408 | * Shutdown everything and perform a clean system power_off. |
409 | */ | 409 | */ |
410 | void kernel_power_off(void) | 410 | void kernel_power_off(void) |
411 | { | 411 | { |
412 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | 412 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); |
413 | if (pm_power_off_prepare) | 413 | if (pm_power_off_prepare) |
414 | pm_power_off_prepare(); | 414 | pm_power_off_prepare(); |
415 | disable_nonboot_cpus(); | 415 | disable_nonboot_cpus(); |
416 | syscore_shutdown(); | 416 | syscore_shutdown(); |
417 | printk(KERN_EMERG "Power down.\n"); | 417 | printk(KERN_EMERG "Power down.\n"); |
418 | kmsg_dump(KMSG_DUMP_POWEROFF); | 418 | kmsg_dump(KMSG_DUMP_POWEROFF); |
419 | machine_power_off(); | 419 | machine_power_off(); |
420 | } | 420 | } |
421 | EXPORT_SYMBOL_GPL(kernel_power_off); | 421 | EXPORT_SYMBOL_GPL(kernel_power_off); |
422 | 422 | ||
423 | static DEFINE_MUTEX(reboot_mutex); | 423 | static DEFINE_MUTEX(reboot_mutex); |
424 | 424 | ||
425 | /* | 425 | /* |
426 | * Reboot system call: for obvious reasons only root may call it, | 426 | * Reboot system call: for obvious reasons only root may call it, |
427 | * and even root needs to set up some magic numbers in the registers | 427 | * and even root needs to set up some magic numbers in the registers |
428 | * so that some mistake won't make this reboot the whole machine. | 428 | * so that some mistake won't make this reboot the whole machine. |
429 | * You can also set the meaning of the ctrl-alt-del-key here. | 429 | * You can also set the meaning of the ctrl-alt-del-key here. |
430 | * | 430 | * |
431 | * reboot doesn't sync: do that yourself before calling this. | 431 | * reboot doesn't sync: do that yourself before calling this. |
432 | */ | 432 | */ |
433 | SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | 433 | SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, |
434 | void __user *, arg) | 434 | void __user *, arg) |
435 | { | 435 | { |
436 | char buffer[256]; | 436 | char buffer[256]; |
437 | int ret = 0; | 437 | int ret = 0; |
438 | 438 | ||
439 | /* We only trust the superuser with rebooting the system. */ | 439 | /* We only trust the superuser with rebooting the system. */ |
440 | if (!capable(CAP_SYS_BOOT)) | 440 | if (!capable(CAP_SYS_BOOT)) |
441 | return -EPERM; | 441 | return -EPERM; |
442 | 442 | ||
443 | /* For safety, we require "magic" arguments. */ | 443 | /* For safety, we require "magic" arguments. */ |
444 | if (magic1 != LINUX_REBOOT_MAGIC1 || | 444 | if (magic1 != LINUX_REBOOT_MAGIC1 || |
445 | (magic2 != LINUX_REBOOT_MAGIC2 && | 445 | (magic2 != LINUX_REBOOT_MAGIC2 && |
446 | magic2 != LINUX_REBOOT_MAGIC2A && | 446 | magic2 != LINUX_REBOOT_MAGIC2A && |
447 | magic2 != LINUX_REBOOT_MAGIC2B && | 447 | magic2 != LINUX_REBOOT_MAGIC2B && |
448 | magic2 != LINUX_REBOOT_MAGIC2C)) | 448 | magic2 != LINUX_REBOOT_MAGIC2C)) |
449 | return -EINVAL; | 449 | return -EINVAL; |
450 | 450 | ||
451 | /* | 451 | /* |
452 | * If pid namespaces are enabled and the current task is in a child | 452 | * If pid namespaces are enabled and the current task is in a child |
453 | * pid_namespace, the command is handled by reboot_pid_ns() which will | 453 | * pid_namespace, the command is handled by reboot_pid_ns() which will |
454 | * call do_exit(). | 454 | * call do_exit(). |
455 | */ | 455 | */ |
456 | ret = reboot_pid_ns(task_active_pid_ns(current), cmd); | 456 | ret = reboot_pid_ns(task_active_pid_ns(current), cmd); |
457 | if (ret) | 457 | if (ret) |
458 | return ret; | 458 | return ret; |
459 | 459 | ||
460 | /* Instead of trying to make the power_off code look like | 460 | /* Instead of trying to make the power_off code look like |
461 | * halt when pm_power_off is not set do it the easy way. | 461 | * halt when pm_power_off is not set do it the easy way. |
462 | */ | 462 | */ |
463 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) | 463 | if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) |
464 | cmd = LINUX_REBOOT_CMD_HALT; | 464 | cmd = LINUX_REBOOT_CMD_HALT; |
465 | 465 | ||
466 | mutex_lock(&reboot_mutex); | 466 | mutex_lock(&reboot_mutex); |
467 | switch (cmd) { | 467 | switch (cmd) { |
468 | case LINUX_REBOOT_CMD_RESTART: | 468 | case LINUX_REBOOT_CMD_RESTART: |
469 | kernel_restart(NULL); | 469 | kernel_restart(NULL); |
470 | break; | 470 | break; |
471 | 471 | ||
472 | case LINUX_REBOOT_CMD_CAD_ON: | 472 | case LINUX_REBOOT_CMD_CAD_ON: |
473 | C_A_D = 1; | 473 | C_A_D = 1; |
474 | break; | 474 | break; |
475 | 475 | ||
476 | case LINUX_REBOOT_CMD_CAD_OFF: | 476 | case LINUX_REBOOT_CMD_CAD_OFF: |
477 | C_A_D = 0; | 477 | C_A_D = 0; |
478 | break; | 478 | break; |
479 | 479 | ||
480 | case LINUX_REBOOT_CMD_HALT: | 480 | case LINUX_REBOOT_CMD_HALT: |
481 | kernel_halt(); | 481 | kernel_halt(); |
482 | do_exit(0); | 482 | do_exit(0); |
483 | panic("cannot halt"); | 483 | panic("cannot halt"); |
484 | 484 | ||
485 | case LINUX_REBOOT_CMD_POWER_OFF: | 485 | case LINUX_REBOOT_CMD_POWER_OFF: |
486 | kernel_power_off(); | 486 | kernel_power_off(); |
487 | do_exit(0); | 487 | do_exit(0); |
488 | break; | 488 | break; |
489 | 489 | ||
490 | case LINUX_REBOOT_CMD_RESTART2: | 490 | case LINUX_REBOOT_CMD_RESTART2: |
491 | if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { | 491 | if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { |
492 | ret = -EFAULT; | 492 | ret = -EFAULT; |
493 | break; | 493 | break; |
494 | } | 494 | } |
495 | buffer[sizeof(buffer) - 1] = '\0'; | 495 | buffer[sizeof(buffer) - 1] = '\0'; |
496 | 496 | ||
497 | kernel_restart(buffer); | 497 | kernel_restart(buffer); |
498 | break; | 498 | break; |
499 | 499 | ||
500 | #ifdef CONFIG_KEXEC | 500 | #ifdef CONFIG_KEXEC |
501 | case LINUX_REBOOT_CMD_KEXEC: | 501 | case LINUX_REBOOT_CMD_KEXEC: |
502 | ret = kernel_kexec(); | 502 | ret = kernel_kexec(); |
503 | break; | 503 | break; |
504 | #endif | 504 | #endif |
505 | 505 | ||
506 | #ifdef CONFIG_HIBERNATION | 506 | #ifdef CONFIG_HIBERNATION |
507 | case LINUX_REBOOT_CMD_SW_SUSPEND: | 507 | case LINUX_REBOOT_CMD_SW_SUSPEND: |
508 | ret = hibernate(); | 508 | ret = hibernate(); |
509 | break; | 509 | break; |
510 | #endif | 510 | #endif |
511 | 511 | ||
512 | default: | 512 | default: |
513 | ret = -EINVAL; | 513 | ret = -EINVAL; |
514 | break; | 514 | break; |
515 | } | 515 | } |
516 | mutex_unlock(&reboot_mutex); | 516 | mutex_unlock(&reboot_mutex); |
517 | return ret; | 517 | return ret; |
518 | } | 518 | } |
519 | 519 | ||
520 | static void deferred_cad(struct work_struct *dummy) | 520 | static void deferred_cad(struct work_struct *dummy) |
521 | { | 521 | { |
522 | kernel_restart(NULL); | 522 | kernel_restart(NULL); |
523 | } | 523 | } |
524 | 524 | ||
525 | /* | 525 | /* |
526 | * This function gets called by ctrl-alt-del - ie the keyboard interrupt. | 526 | * This function gets called by ctrl-alt-del - ie the keyboard interrupt. |
527 | * As it's called within an interrupt, it may NOT sync: the only choice | 527 | * As it's called within an interrupt, it may NOT sync: the only choice |
528 | * is whether to reboot at once, or just ignore the ctrl-alt-del. | 528 | * is whether to reboot at once, or just ignore the ctrl-alt-del. |
529 | */ | 529 | */ |
530 | void ctrl_alt_del(void) | 530 | void ctrl_alt_del(void) |
531 | { | 531 | { |
532 | static DECLARE_WORK(cad_work, deferred_cad); | 532 | static DECLARE_WORK(cad_work, deferred_cad); |
533 | 533 | ||
534 | if (C_A_D) | 534 | if (C_A_D) |
535 | schedule_work(&cad_work); | 535 | schedule_work(&cad_work); |
536 | else | 536 | else |
537 | kill_cad_pid(SIGINT, 1); | 537 | kill_cad_pid(SIGINT, 1); |
538 | } | 538 | } |
539 | 539 | ||
540 | /* | 540 | /* |
541 | * Unprivileged users may change the real gid to the effective gid | 541 | * Unprivileged users may change the real gid to the effective gid |
542 | * or vice versa. (BSD-style) | 542 | * or vice versa. (BSD-style) |
543 | * | 543 | * |
544 | * If you set the real gid at all, or set the effective gid to a value not | 544 | * If you set the real gid at all, or set the effective gid to a value not |
545 | * equal to the real gid, then the saved gid is set to the new effective gid. | 545 | * equal to the real gid, then the saved gid is set to the new effective gid. |
546 | * | 546 | * |
547 | * This makes it possible for a setgid program to completely drop its | 547 | * This makes it possible for a setgid program to completely drop its |
548 | * privileges, which is often a useful assertion to make when you are doing | 548 | * privileges, which is often a useful assertion to make when you are doing |
549 | * a security audit over a program. | 549 | * a security audit over a program. |
550 | * | 550 | * |
551 | * The general idea is that a program which uses just setregid() will be | 551 | * The general idea is that a program which uses just setregid() will be |
552 | * 100% compatible with BSD. A program which uses just setgid() will be | 552 | * 100% compatible with BSD. A program which uses just setgid() will be |
553 | * 100% compatible with POSIX with saved IDs. | 553 | * 100% compatible with POSIX with saved IDs. |
554 | * | 554 | * |
555 | * SMP: There are not races, the GIDs are checked only by filesystem | 555 | * SMP: There are not races, the GIDs are checked only by filesystem |
556 | * operations (as far as semantic preservation is concerned). | 556 | * operations (as far as semantic preservation is concerned). |
557 | */ | 557 | */ |
558 | SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) | 558 | SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) |
559 | { | 559 | { |
560 | struct user_namespace *ns = current_user_ns(); | 560 | struct user_namespace *ns = current_user_ns(); |
561 | const struct cred *old; | 561 | const struct cred *old; |
562 | struct cred *new; | 562 | struct cred *new; |
563 | int retval; | 563 | int retval; |
564 | kgid_t krgid, kegid; | 564 | kgid_t krgid, kegid; |
565 | 565 | ||
566 | krgid = make_kgid(ns, rgid); | 566 | krgid = make_kgid(ns, rgid); |
567 | kegid = make_kgid(ns, egid); | 567 | kegid = make_kgid(ns, egid); |
568 | 568 | ||
569 | if ((rgid != (gid_t) -1) && !gid_valid(krgid)) | 569 | if ((rgid != (gid_t) -1) && !gid_valid(krgid)) |
570 | return -EINVAL; | 570 | return -EINVAL; |
571 | if ((egid != (gid_t) -1) && !gid_valid(kegid)) | 571 | if ((egid != (gid_t) -1) && !gid_valid(kegid)) |
572 | return -EINVAL; | 572 | return -EINVAL; |
573 | 573 | ||
574 | new = prepare_creds(); | 574 | new = prepare_creds(); |
575 | if (!new) | 575 | if (!new) |
576 | return -ENOMEM; | 576 | return -ENOMEM; |
577 | old = current_cred(); | 577 | old = current_cred(); |
578 | 578 | ||
579 | retval = -EPERM; | 579 | retval = -EPERM; |
580 | if (rgid != (gid_t) -1) { | 580 | if (rgid != (gid_t) -1) { |
581 | if (gid_eq(old->gid, krgid) || | 581 | if (gid_eq(old->gid, krgid) || |
582 | gid_eq(old->egid, krgid) || | 582 | gid_eq(old->egid, krgid) || |
583 | nsown_capable(CAP_SETGID)) | 583 | nsown_capable(CAP_SETGID)) |
584 | new->gid = krgid; | 584 | new->gid = krgid; |
585 | else | 585 | else |
586 | goto error; | 586 | goto error; |
587 | } | 587 | } |
588 | if (egid != (gid_t) -1) { | 588 | if (egid != (gid_t) -1) { |
589 | if (gid_eq(old->gid, kegid) || | 589 | if (gid_eq(old->gid, kegid) || |
590 | gid_eq(old->egid, kegid) || | 590 | gid_eq(old->egid, kegid) || |
591 | gid_eq(old->sgid, kegid) || | 591 | gid_eq(old->sgid, kegid) || |
592 | nsown_capable(CAP_SETGID)) | 592 | nsown_capable(CAP_SETGID)) |
593 | new->egid = kegid; | 593 | new->egid = kegid; |
594 | else | 594 | else |
595 | goto error; | 595 | goto error; |
596 | } | 596 | } |
597 | 597 | ||
598 | if (rgid != (gid_t) -1 || | 598 | if (rgid != (gid_t) -1 || |
599 | (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) | 599 | (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) |
600 | new->sgid = new->egid; | 600 | new->sgid = new->egid; |
601 | new->fsgid = new->egid; | 601 | new->fsgid = new->egid; |
602 | 602 | ||
603 | return commit_creds(new); | 603 | return commit_creds(new); |
604 | 604 | ||
605 | error: | 605 | error: |
606 | abort_creds(new); | 606 | abort_creds(new); |
607 | return retval; | 607 | return retval; |
608 | } | 608 | } |
609 | 609 | ||
610 | /* | 610 | /* |
611 | * setgid() is implemented like SysV w/ SAVED_IDS | 611 | * setgid() is implemented like SysV w/ SAVED_IDS |
612 | * | 612 | * |
613 | * SMP: Same implicit races as above. | 613 | * SMP: Same implicit races as above. |
614 | */ | 614 | */ |
615 | SYSCALL_DEFINE1(setgid, gid_t, gid) | 615 | SYSCALL_DEFINE1(setgid, gid_t, gid) |
616 | { | 616 | { |
617 | struct user_namespace *ns = current_user_ns(); | 617 | struct user_namespace *ns = current_user_ns(); |
618 | const struct cred *old; | 618 | const struct cred *old; |
619 | struct cred *new; | 619 | struct cred *new; |
620 | int retval; | 620 | int retval; |
621 | kgid_t kgid; | 621 | kgid_t kgid; |
622 | 622 | ||
623 | kgid = make_kgid(ns, gid); | 623 | kgid = make_kgid(ns, gid); |
624 | if (!gid_valid(kgid)) | 624 | if (!gid_valid(kgid)) |
625 | return -EINVAL; | 625 | return -EINVAL; |
626 | 626 | ||
627 | new = prepare_creds(); | 627 | new = prepare_creds(); |
628 | if (!new) | 628 | if (!new) |
629 | return -ENOMEM; | 629 | return -ENOMEM; |
630 | old = current_cred(); | 630 | old = current_cred(); |
631 | 631 | ||
632 | retval = -EPERM; | 632 | retval = -EPERM; |
633 | if (nsown_capable(CAP_SETGID)) | 633 | if (nsown_capable(CAP_SETGID)) |
634 | new->gid = new->egid = new->sgid = new->fsgid = kgid; | 634 | new->gid = new->egid = new->sgid = new->fsgid = kgid; |
635 | else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) | 635 | else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) |
636 | new->egid = new->fsgid = kgid; | 636 | new->egid = new->fsgid = kgid; |
637 | else | 637 | else |
638 | goto error; | 638 | goto error; |
639 | 639 | ||
640 | return commit_creds(new); | 640 | return commit_creds(new); |
641 | 641 | ||
642 | error: | 642 | error: |
643 | abort_creds(new); | 643 | abort_creds(new); |
644 | return retval; | 644 | return retval; |
645 | } | 645 | } |
646 | 646 | ||
647 | /* | 647 | /* |
648 | * change the user struct in a credentials set to match the new UID | 648 | * change the user struct in a credentials set to match the new UID |
649 | */ | 649 | */ |
650 | static int set_user(struct cred *new) | 650 | static int set_user(struct cred *new) |
651 | { | 651 | { |
652 | struct user_struct *new_user; | 652 | struct user_struct *new_user; |
653 | 653 | ||
654 | new_user = alloc_uid(new->uid); | 654 | new_user = alloc_uid(new->uid); |
655 | if (!new_user) | 655 | if (!new_user) |
656 | return -EAGAIN; | 656 | return -EAGAIN; |
657 | 657 | ||
658 | /* | 658 | /* |
659 | * We don't fail in case of NPROC limit excess here because too many | 659 | * We don't fail in case of NPROC limit excess here because too many |
660 | * poorly written programs don't check set*uid() return code, assuming | 660 | * poorly written programs don't check set*uid() return code, assuming |
661 | * it never fails if called by root. We may still enforce NPROC limit | 661 | * it never fails if called by root. We may still enforce NPROC limit |
662 | * for programs doing set*uid()+execve() by harmlessly deferring the | 662 | * for programs doing set*uid()+execve() by harmlessly deferring the |
663 | * failure to the execve() stage. | 663 | * failure to the execve() stage. |
664 | */ | 664 | */ |
665 | if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && | 665 | if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && |
666 | new_user != INIT_USER) | 666 | new_user != INIT_USER) |
667 | current->flags |= PF_NPROC_EXCEEDED; | 667 | current->flags |= PF_NPROC_EXCEEDED; |
668 | else | 668 | else |
669 | current->flags &= ~PF_NPROC_EXCEEDED; | 669 | current->flags &= ~PF_NPROC_EXCEEDED; |
670 | 670 | ||
671 | free_uid(new->user); | 671 | free_uid(new->user); |
672 | new->user = new_user; | 672 | new->user = new_user; |
673 | return 0; | 673 | return 0; |
674 | } | 674 | } |
675 | 675 | ||
676 | /* | 676 | /* |
677 | * Unprivileged users may change the real uid to the effective uid | 677 | * Unprivileged users may change the real uid to the effective uid |
678 | * or vice versa. (BSD-style) | 678 | * or vice versa. (BSD-style) |
679 | * | 679 | * |
680 | * If you set the real uid at all, or set the effective uid to a value not | 680 | * If you set the real uid at all, or set the effective uid to a value not |
681 | * equal to the real uid, then the saved uid is set to the new effective uid. | 681 | * equal to the real uid, then the saved uid is set to the new effective uid. |
682 | * | 682 | * |
683 | * This makes it possible for a setuid program to completely drop its | 683 | * This makes it possible for a setuid program to completely drop its |
684 | * privileges, which is often a useful assertion to make when you are doing | 684 | * privileges, which is often a useful assertion to make when you are doing |
685 | * a security audit over a program. | 685 | * a security audit over a program. |
686 | * | 686 | * |
687 | * The general idea is that a program which uses just setreuid() will be | 687 | * The general idea is that a program which uses just setreuid() will be |
688 | * 100% compatible with BSD. A program which uses just setuid() will be | 688 | * 100% compatible with BSD. A program which uses just setuid() will be |
689 | * 100% compatible with POSIX with saved IDs. | 689 | * 100% compatible with POSIX with saved IDs. |
690 | */ | 690 | */ |
691 | SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) | 691 | SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) |
692 | { | 692 | { |
693 | struct user_namespace *ns = current_user_ns(); | 693 | struct user_namespace *ns = current_user_ns(); |
694 | const struct cred *old; | 694 | const struct cred *old; |
695 | struct cred *new; | 695 | struct cred *new; |
696 | int retval; | 696 | int retval; |
697 | kuid_t kruid, keuid; | 697 | kuid_t kruid, keuid; |
698 | 698 | ||
699 | kruid = make_kuid(ns, ruid); | 699 | kruid = make_kuid(ns, ruid); |
700 | keuid = make_kuid(ns, euid); | 700 | keuid = make_kuid(ns, euid); |
701 | 701 | ||
702 | if ((ruid != (uid_t) -1) && !uid_valid(kruid)) | 702 | if ((ruid != (uid_t) -1) && !uid_valid(kruid)) |
703 | return -EINVAL; | 703 | return -EINVAL; |
704 | if ((euid != (uid_t) -1) && !uid_valid(keuid)) | 704 | if ((euid != (uid_t) -1) && !uid_valid(keuid)) |
705 | return -EINVAL; | 705 | return -EINVAL; |
706 | 706 | ||
707 | new = prepare_creds(); | 707 | new = prepare_creds(); |
708 | if (!new) | 708 | if (!new) |
709 | return -ENOMEM; | 709 | return -ENOMEM; |
710 | old = current_cred(); | 710 | old = current_cred(); |
711 | 711 | ||
712 | retval = -EPERM; | 712 | retval = -EPERM; |
713 | if (ruid != (uid_t) -1) { | 713 | if (ruid != (uid_t) -1) { |
714 | new->uid = kruid; | 714 | new->uid = kruid; |
715 | if (!uid_eq(old->uid, kruid) && | 715 | if (!uid_eq(old->uid, kruid) && |
716 | !uid_eq(old->euid, kruid) && | 716 | !uid_eq(old->euid, kruid) && |
717 | !nsown_capable(CAP_SETUID)) | 717 | !nsown_capable(CAP_SETUID)) |
718 | goto error; | 718 | goto error; |
719 | } | 719 | } |
720 | 720 | ||
721 | if (euid != (uid_t) -1) { | 721 | if (euid != (uid_t) -1) { |
722 | new->euid = keuid; | 722 | new->euid = keuid; |
723 | if (!uid_eq(old->uid, keuid) && | 723 | if (!uid_eq(old->uid, keuid) && |
724 | !uid_eq(old->euid, keuid) && | 724 | !uid_eq(old->euid, keuid) && |
725 | !uid_eq(old->suid, keuid) && | 725 | !uid_eq(old->suid, keuid) && |
726 | !nsown_capable(CAP_SETUID)) | 726 | !nsown_capable(CAP_SETUID)) |
727 | goto error; | 727 | goto error; |
728 | } | 728 | } |
729 | 729 | ||
730 | if (!uid_eq(new->uid, old->uid)) { | 730 | if (!uid_eq(new->uid, old->uid)) { |
731 | retval = set_user(new); | 731 | retval = set_user(new); |
732 | if (retval < 0) | 732 | if (retval < 0) |
733 | goto error; | 733 | goto error; |
734 | } | 734 | } |
735 | if (ruid != (uid_t) -1 || | 735 | if (ruid != (uid_t) -1 || |
736 | (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) | 736 | (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) |
737 | new->suid = new->euid; | 737 | new->suid = new->euid; |
738 | new->fsuid = new->euid; | 738 | new->fsuid = new->euid; |
739 | 739 | ||
740 | retval = security_task_fix_setuid(new, old, LSM_SETID_RE); | 740 | retval = security_task_fix_setuid(new, old, LSM_SETID_RE); |
741 | if (retval < 0) | 741 | if (retval < 0) |
742 | goto error; | 742 | goto error; |
743 | 743 | ||
744 | return commit_creds(new); | 744 | return commit_creds(new); |
745 | 745 | ||
746 | error: | 746 | error: |
747 | abort_creds(new); | 747 | abort_creds(new); |
748 | return retval; | 748 | return retval; |
749 | } | 749 | } |
750 | 750 | ||
751 | /* | 751 | /* |
752 | * setuid() is implemented like SysV with SAVED_IDS | 752 | * setuid() is implemented like SysV with SAVED_IDS |
753 | * | 753 | * |
754 | * Note that SAVED_ID's is deficient in that a setuid root program | 754 | * Note that SAVED_ID's is deficient in that a setuid root program |
755 | * like sendmail, for example, cannot set its uid to be a normal | 755 | * like sendmail, for example, cannot set its uid to be a normal |
756 | * user and then switch back, because if you're root, setuid() sets | 756 | * user and then switch back, because if you're root, setuid() sets |
757 | * the saved uid too. If you don't like this, blame the bright people | 757 | * the saved uid too. If you don't like this, blame the bright people |
758 | * in the POSIX committee and/or USG. Note that the BSD-style setreuid() | 758 | * in the POSIX committee and/or USG. Note that the BSD-style setreuid() |
759 | * will allow a root program to temporarily drop privileges and be able to | 759 | * will allow a root program to temporarily drop privileges and be able to |
760 | * regain them by swapping the real and effective uid. | 760 | * regain them by swapping the real and effective uid. |
761 | */ | 761 | */ |
762 | SYSCALL_DEFINE1(setuid, uid_t, uid) | 762 | SYSCALL_DEFINE1(setuid, uid_t, uid) |
763 | { | 763 | { |
764 | struct user_namespace *ns = current_user_ns(); | 764 | struct user_namespace *ns = current_user_ns(); |
765 | const struct cred *old; | 765 | const struct cred *old; |
766 | struct cred *new; | 766 | struct cred *new; |
767 | int retval; | 767 | int retval; |
768 | kuid_t kuid; | 768 | kuid_t kuid; |
769 | 769 | ||
770 | kuid = make_kuid(ns, uid); | 770 | kuid = make_kuid(ns, uid); |
771 | if (!uid_valid(kuid)) | 771 | if (!uid_valid(kuid)) |
772 | return -EINVAL; | 772 | return -EINVAL; |
773 | 773 | ||
774 | new = prepare_creds(); | 774 | new = prepare_creds(); |
775 | if (!new) | 775 | if (!new) |
776 | return -ENOMEM; | 776 | return -ENOMEM; |
777 | old = current_cred(); | 777 | old = current_cred(); |
778 | 778 | ||
779 | retval = -EPERM; | 779 | retval = -EPERM; |
780 | if (nsown_capable(CAP_SETUID)) { | 780 | if (nsown_capable(CAP_SETUID)) { |
781 | new->suid = new->uid = kuid; | 781 | new->suid = new->uid = kuid; |
782 | if (!uid_eq(kuid, old->uid)) { | 782 | if (!uid_eq(kuid, old->uid)) { |
783 | retval = set_user(new); | 783 | retval = set_user(new); |
784 | if (retval < 0) | 784 | if (retval < 0) |
785 | goto error; | 785 | goto error; |
786 | } | 786 | } |
787 | } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { | 787 | } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { |
788 | goto error; | 788 | goto error; |
789 | } | 789 | } |
790 | 790 | ||
791 | new->fsuid = new->euid = kuid; | 791 | new->fsuid = new->euid = kuid; |
792 | 792 | ||
793 | retval = security_task_fix_setuid(new, old, LSM_SETID_ID); | 793 | retval = security_task_fix_setuid(new, old, LSM_SETID_ID); |
794 | if (retval < 0) | 794 | if (retval < 0) |
795 | goto error; | 795 | goto error; |
796 | 796 | ||
797 | return commit_creds(new); | 797 | return commit_creds(new); |
798 | 798 | ||
799 | error: | 799 | error: |
800 | abort_creds(new); | 800 | abort_creds(new); |
801 | return retval; | 801 | return retval; |
802 | } | 802 | } |
803 | 803 | ||
804 | 804 | ||
805 | /* | 805 | /* |
806 | * This function implements a generic ability to update ruid, euid, | 806 | * This function implements a generic ability to update ruid, euid, |
807 | * and suid. This allows you to implement the 4.4 compatible seteuid(). | 807 | * and suid. This allows you to implement the 4.4 compatible seteuid(). |
808 | */ | 808 | */ |
809 | SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) | 809 | SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) |
810 | { | 810 | { |
811 | struct user_namespace *ns = current_user_ns(); | 811 | struct user_namespace *ns = current_user_ns(); |
812 | const struct cred *old; | 812 | const struct cred *old; |
813 | struct cred *new; | 813 | struct cred *new; |
814 | int retval; | 814 | int retval; |
815 | kuid_t kruid, keuid, ksuid; | 815 | kuid_t kruid, keuid, ksuid; |
816 | 816 | ||
817 | kruid = make_kuid(ns, ruid); | 817 | kruid = make_kuid(ns, ruid); |
818 | keuid = make_kuid(ns, euid); | 818 | keuid = make_kuid(ns, euid); |
819 | ksuid = make_kuid(ns, suid); | 819 | ksuid = make_kuid(ns, suid); |
820 | 820 | ||
821 | if ((ruid != (uid_t) -1) && !uid_valid(kruid)) | 821 | if ((ruid != (uid_t) -1) && !uid_valid(kruid)) |
822 | return -EINVAL; | 822 | return -EINVAL; |
823 | 823 | ||
824 | if ((euid != (uid_t) -1) && !uid_valid(keuid)) | 824 | if ((euid != (uid_t) -1) && !uid_valid(keuid)) |
825 | return -EINVAL; | 825 | return -EINVAL; |
826 | 826 | ||
827 | if ((suid != (uid_t) -1) && !uid_valid(ksuid)) | 827 | if ((suid != (uid_t) -1) && !uid_valid(ksuid)) |
828 | return -EINVAL; | 828 | return -EINVAL; |
829 | 829 | ||
830 | new = prepare_creds(); | 830 | new = prepare_creds(); |
831 | if (!new) | 831 | if (!new) |
832 | return -ENOMEM; | 832 | return -ENOMEM; |
833 | 833 | ||
834 | old = current_cred(); | 834 | old = current_cred(); |
835 | 835 | ||
836 | retval = -EPERM; | 836 | retval = -EPERM; |
837 | if (!nsown_capable(CAP_SETUID)) { | 837 | if (!nsown_capable(CAP_SETUID)) { |
838 | if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && | 838 | if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && |
839 | !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) | 839 | !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) |
840 | goto error; | 840 | goto error; |
841 | if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && | 841 | if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && |
842 | !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) | 842 | !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) |
843 | goto error; | 843 | goto error; |
844 | if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && | 844 | if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && |
845 | !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) | 845 | !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) |
846 | goto error; | 846 | goto error; |
847 | } | 847 | } |
848 | 848 | ||
849 | if (ruid != (uid_t) -1) { | 849 | if (ruid != (uid_t) -1) { |
850 | new->uid = kruid; | 850 | new->uid = kruid; |
851 | if (!uid_eq(kruid, old->uid)) { | 851 | if (!uid_eq(kruid, old->uid)) { |
852 | retval = set_user(new); | 852 | retval = set_user(new); |
853 | if (retval < 0) | 853 | if (retval < 0) |
854 | goto error; | 854 | goto error; |
855 | } | 855 | } |
856 | } | 856 | } |
857 | if (euid != (uid_t) -1) | 857 | if (euid != (uid_t) -1) |
858 | new->euid = keuid; | 858 | new->euid = keuid; |
859 | if (suid != (uid_t) -1) | 859 | if (suid != (uid_t) -1) |
860 | new->suid = ksuid; | 860 | new->suid = ksuid; |
861 | new->fsuid = new->euid; | 861 | new->fsuid = new->euid; |
862 | 862 | ||
863 | retval = security_task_fix_setuid(new, old, LSM_SETID_RES); | 863 | retval = security_task_fix_setuid(new, old, LSM_SETID_RES); |
864 | if (retval < 0) | 864 | if (retval < 0) |
865 | goto error; | 865 | goto error; |
866 | 866 | ||
867 | return commit_creds(new); | 867 | return commit_creds(new); |
868 | 868 | ||
869 | error: | 869 | error: |
870 | abort_creds(new); | 870 | abort_creds(new); |
871 | return retval; | 871 | return retval; |
872 | } | 872 | } |
873 | 873 | ||
874 | SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) | 874 | SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) |
875 | { | 875 | { |
876 | const struct cred *cred = current_cred(); | 876 | const struct cred *cred = current_cred(); |
877 | int retval; | 877 | int retval; |
878 | uid_t ruid, euid, suid; | 878 | uid_t ruid, euid, suid; |
879 | 879 | ||
880 | ruid = from_kuid_munged(cred->user_ns, cred->uid); | 880 | ruid = from_kuid_munged(cred->user_ns, cred->uid); |
881 | euid = from_kuid_munged(cred->user_ns, cred->euid); | 881 | euid = from_kuid_munged(cred->user_ns, cred->euid); |
882 | suid = from_kuid_munged(cred->user_ns, cred->suid); | 882 | suid = from_kuid_munged(cred->user_ns, cred->suid); |
883 | 883 | ||
884 | if (!(retval = put_user(ruid, ruidp)) && | 884 | if (!(retval = put_user(ruid, ruidp)) && |
885 | !(retval = put_user(euid, euidp))) | 885 | !(retval = put_user(euid, euidp))) |
886 | retval = put_user(suid, suidp); | 886 | retval = put_user(suid, suidp); |
887 | 887 | ||
888 | return retval; | 888 | return retval; |
889 | } | 889 | } |
890 | 890 | ||
891 | /* | 891 | /* |
892 | * Same as above, but for rgid, egid, sgid. | 892 | * Same as above, but for rgid, egid, sgid. |
893 | */ | 893 | */ |
894 | SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) | 894 | SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) |
895 | { | 895 | { |
896 | struct user_namespace *ns = current_user_ns(); | 896 | struct user_namespace *ns = current_user_ns(); |
897 | const struct cred *old; | 897 | const struct cred *old; |
898 | struct cred *new; | 898 | struct cred *new; |
899 | int retval; | 899 | int retval; |
900 | kgid_t krgid, kegid, ksgid; | 900 | kgid_t krgid, kegid, ksgid; |
901 | 901 | ||
902 | krgid = make_kgid(ns, rgid); | 902 | krgid = make_kgid(ns, rgid); |
903 | kegid = make_kgid(ns, egid); | 903 | kegid = make_kgid(ns, egid); |
904 | ksgid = make_kgid(ns, sgid); | 904 | ksgid = make_kgid(ns, sgid); |
905 | 905 | ||
906 | if ((rgid != (gid_t) -1) && !gid_valid(krgid)) | 906 | if ((rgid != (gid_t) -1) && !gid_valid(krgid)) |
907 | return -EINVAL; | 907 | return -EINVAL; |
908 | if ((egid != (gid_t) -1) && !gid_valid(kegid)) | 908 | if ((egid != (gid_t) -1) && !gid_valid(kegid)) |
909 | return -EINVAL; | 909 | return -EINVAL; |
910 | if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) | 910 | if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) |
911 | return -EINVAL; | 911 | return -EINVAL; |
912 | 912 | ||
913 | new = prepare_creds(); | 913 | new = prepare_creds(); |
914 | if (!new) | 914 | if (!new) |
915 | return -ENOMEM; | 915 | return -ENOMEM; |
916 | old = current_cred(); | 916 | old = current_cred(); |
917 | 917 | ||
918 | retval = -EPERM; | 918 | retval = -EPERM; |
919 | if (!nsown_capable(CAP_SETGID)) { | 919 | if (!nsown_capable(CAP_SETGID)) { |
920 | if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && | 920 | if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && |
921 | !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) | 921 | !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) |
922 | goto error; | 922 | goto error; |
923 | if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && | 923 | if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && |
924 | !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) | 924 | !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) |
925 | goto error; | 925 | goto error; |
926 | if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && | 926 | if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && |
927 | !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) | 927 | !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) |
928 | goto error; | 928 | goto error; |
929 | } | 929 | } |
930 | 930 | ||
931 | if (rgid != (gid_t) -1) | 931 | if (rgid != (gid_t) -1) |
932 | new->gid = krgid; | 932 | new->gid = krgid; |
933 | if (egid != (gid_t) -1) | 933 | if (egid != (gid_t) -1) |
934 | new->egid = kegid; | 934 | new->egid = kegid; |
935 | if (sgid != (gid_t) -1) | 935 | if (sgid != (gid_t) -1) |
936 | new->sgid = ksgid; | 936 | new->sgid = ksgid; |
937 | new->fsgid = new->egid; | 937 | new->fsgid = new->egid; |
938 | 938 | ||
939 | return commit_creds(new); | 939 | return commit_creds(new); |
940 | 940 | ||
941 | error: | 941 | error: |
942 | abort_creds(new); | 942 | abort_creds(new); |
943 | return retval; | 943 | return retval; |
944 | } | 944 | } |
945 | 945 | ||
946 | SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) | 946 | SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) |
947 | { | 947 | { |
948 | const struct cred *cred = current_cred(); | 948 | const struct cred *cred = current_cred(); |
949 | int retval; | 949 | int retval; |
950 | gid_t rgid, egid, sgid; | 950 | gid_t rgid, egid, sgid; |
951 | 951 | ||
952 | rgid = from_kgid_munged(cred->user_ns, cred->gid); | 952 | rgid = from_kgid_munged(cred->user_ns, cred->gid); |
953 | egid = from_kgid_munged(cred->user_ns, cred->egid); | 953 | egid = from_kgid_munged(cred->user_ns, cred->egid); |
954 | sgid = from_kgid_munged(cred->user_ns, cred->sgid); | 954 | sgid = from_kgid_munged(cred->user_ns, cred->sgid); |
955 | 955 | ||
956 | if (!(retval = put_user(rgid, rgidp)) && | 956 | if (!(retval = put_user(rgid, rgidp)) && |
957 | !(retval = put_user(egid, egidp))) | 957 | !(retval = put_user(egid, egidp))) |
958 | retval = put_user(sgid, sgidp); | 958 | retval = put_user(sgid, sgidp); |
959 | 959 | ||
960 | return retval; | 960 | return retval; |
961 | } | 961 | } |
962 | 962 | ||
963 | 963 | ||
964 | /* | 964 | /* |
965 | * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This | 965 | * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This |
966 | * is used for "access()" and for the NFS daemon (letting nfsd stay at | 966 | * is used for "access()" and for the NFS daemon (letting nfsd stay at |
967 | * whatever uid it wants to). It normally shadows "euid", except when | 967 | * whatever uid it wants to). It normally shadows "euid", except when |
968 | * explicitly set by setfsuid() or for access.. | 968 | * explicitly set by setfsuid() or for access.. |
969 | */ | 969 | */ |
970 | SYSCALL_DEFINE1(setfsuid, uid_t, uid) | 970 | SYSCALL_DEFINE1(setfsuid, uid_t, uid) |
971 | { | 971 | { |
972 | const struct cred *old; | 972 | const struct cred *old; |
973 | struct cred *new; | 973 | struct cred *new; |
974 | uid_t old_fsuid; | 974 | uid_t old_fsuid; |
975 | kuid_t kuid; | 975 | kuid_t kuid; |
976 | 976 | ||
977 | old = current_cred(); | 977 | old = current_cred(); |
978 | old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); | 978 | old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); |
979 | 979 | ||
980 | kuid = make_kuid(old->user_ns, uid); | 980 | kuid = make_kuid(old->user_ns, uid); |
981 | if (!uid_valid(kuid)) | 981 | if (!uid_valid(kuid)) |
982 | return old_fsuid; | 982 | return old_fsuid; |
983 | 983 | ||
984 | new = prepare_creds(); | 984 | new = prepare_creds(); |
985 | if (!new) | 985 | if (!new) |
986 | return old_fsuid; | 986 | return old_fsuid; |
987 | 987 | ||
988 | if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || | 988 | if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || |
989 | uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || | 989 | uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || |
990 | nsown_capable(CAP_SETUID)) { | 990 | nsown_capable(CAP_SETUID)) { |
991 | if (!uid_eq(kuid, old->fsuid)) { | 991 | if (!uid_eq(kuid, old->fsuid)) { |
992 | new->fsuid = kuid; | 992 | new->fsuid = kuid; |
993 | if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) | 993 | if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) |
994 | goto change_okay; | 994 | goto change_okay; |
995 | } | 995 | } |
996 | } | 996 | } |
997 | 997 | ||
998 | abort_creds(new); | 998 | abort_creds(new); |
999 | return old_fsuid; | 999 | return old_fsuid; |
1000 | 1000 | ||
1001 | change_okay: | 1001 | change_okay: |
1002 | commit_creds(new); | 1002 | commit_creds(new); |
1003 | return old_fsuid; | 1003 | return old_fsuid; |
1004 | } | 1004 | } |
1005 | 1005 | ||
1006 | /* | 1006 | /* |
1007 | * Samma pรฅ svenska.. | 1007 | * Samma pรฅ svenska.. |
1008 | */ | 1008 | */ |
1009 | SYSCALL_DEFINE1(setfsgid, gid_t, gid) | 1009 | SYSCALL_DEFINE1(setfsgid, gid_t, gid) |
1010 | { | 1010 | { |
1011 | const struct cred *old; | 1011 | const struct cred *old; |
1012 | struct cred *new; | 1012 | struct cred *new; |
1013 | gid_t old_fsgid; | 1013 | gid_t old_fsgid; |
1014 | kgid_t kgid; | 1014 | kgid_t kgid; |
1015 | 1015 | ||
1016 | old = current_cred(); | 1016 | old = current_cred(); |
1017 | old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); | 1017 | old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); |
1018 | 1018 | ||
1019 | kgid = make_kgid(old->user_ns, gid); | 1019 | kgid = make_kgid(old->user_ns, gid); |
1020 | if (!gid_valid(kgid)) | 1020 | if (!gid_valid(kgid)) |
1021 | return old_fsgid; | 1021 | return old_fsgid; |
1022 | 1022 | ||
1023 | new = prepare_creds(); | 1023 | new = prepare_creds(); |
1024 | if (!new) | 1024 | if (!new) |
1025 | return old_fsgid; | 1025 | return old_fsgid; |
1026 | 1026 | ||
1027 | if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || | 1027 | if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || |
1028 | gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || | 1028 | gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || |
1029 | nsown_capable(CAP_SETGID)) { | 1029 | nsown_capable(CAP_SETGID)) { |
1030 | if (!gid_eq(kgid, old->fsgid)) { | 1030 | if (!gid_eq(kgid, old->fsgid)) { |
1031 | new->fsgid = kgid; | 1031 | new->fsgid = kgid; |
1032 | goto change_okay; | 1032 | goto change_okay; |
1033 | } | 1033 | } |
1034 | } | 1034 | } |
1035 | 1035 | ||
1036 | abort_creds(new); | 1036 | abort_creds(new); |
1037 | return old_fsgid; | 1037 | return old_fsgid; |
1038 | 1038 | ||
1039 | change_okay: | 1039 | change_okay: |
1040 | commit_creds(new); | 1040 | commit_creds(new); |
1041 | return old_fsgid; | 1041 | return old_fsgid; |
1042 | } | 1042 | } |
1043 | 1043 | ||
1044 | void do_sys_times(struct tms *tms) | 1044 | void do_sys_times(struct tms *tms) |
1045 | { | 1045 | { |
1046 | cputime_t tgutime, tgstime, cutime, cstime; | 1046 | cputime_t tgutime, tgstime, cutime, cstime; |
1047 | 1047 | ||
1048 | spin_lock_irq(¤t->sighand->siglock); | 1048 | spin_lock_irq(¤t->sighand->siglock); |
1049 | thread_group_times(current, &tgutime, &tgstime); | 1049 | thread_group_cputime_adjusted(current, &tgutime, &tgstime); |
1050 | cutime = current->signal->cutime; | 1050 | cutime = current->signal->cutime; |
1051 | cstime = current->signal->cstime; | 1051 | cstime = current->signal->cstime; |
1052 | spin_unlock_irq(¤t->sighand->siglock); | 1052 | spin_unlock_irq(¤t->sighand->siglock); |
1053 | tms->tms_utime = cputime_to_clock_t(tgutime); | 1053 | tms->tms_utime = cputime_to_clock_t(tgutime); |
1054 | tms->tms_stime = cputime_to_clock_t(tgstime); | 1054 | tms->tms_stime = cputime_to_clock_t(tgstime); |
1055 | tms->tms_cutime = cputime_to_clock_t(cutime); | 1055 | tms->tms_cutime = cputime_to_clock_t(cutime); |
1056 | tms->tms_cstime = cputime_to_clock_t(cstime); | 1056 | tms->tms_cstime = cputime_to_clock_t(cstime); |
1057 | } | 1057 | } |
1058 | 1058 | ||
1059 | SYSCALL_DEFINE1(times, struct tms __user *, tbuf) | 1059 | SYSCALL_DEFINE1(times, struct tms __user *, tbuf) |
1060 | { | 1060 | { |
1061 | if (tbuf) { | 1061 | if (tbuf) { |
1062 | struct tms tmp; | 1062 | struct tms tmp; |
1063 | 1063 | ||
1064 | do_sys_times(&tmp); | 1064 | do_sys_times(&tmp); |
1065 | if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) | 1065 | if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) |
1066 | return -EFAULT; | 1066 | return -EFAULT; |
1067 | } | 1067 | } |
1068 | force_successful_syscall_return(); | 1068 | force_successful_syscall_return(); |
1069 | return (long) jiffies_64_to_clock_t(get_jiffies_64()); | 1069 | return (long) jiffies_64_to_clock_t(get_jiffies_64()); |
1070 | } | 1070 | } |
1071 | 1071 | ||
1072 | /* | 1072 | /* |
1073 | * This needs some heavy checking ... | 1073 | * This needs some heavy checking ... |
1074 | * I just haven't the stomach for it. I also don't fully | 1074 | * I just haven't the stomach for it. I also don't fully |
1075 | * understand sessions/pgrp etc. Let somebody who does explain it. | 1075 | * understand sessions/pgrp etc. Let somebody who does explain it. |
1076 | * | 1076 | * |
1077 | * OK, I think I have the protection semantics right.... this is really | 1077 | * OK, I think I have the protection semantics right.... this is really |
1078 | * only important on a multi-user system anyway, to make sure one user | 1078 | * only important on a multi-user system anyway, to make sure one user |
1079 | * can't send a signal to a process owned by another. -TYT, 12/12/91 | 1079 | * can't send a signal to a process owned by another. -TYT, 12/12/91 |
1080 | * | 1080 | * |
1081 | * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. | 1081 | * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. |
1082 | * LBT 04.03.94 | 1082 | * LBT 04.03.94 |
1083 | */ | 1083 | */ |
1084 | SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) | 1084 | SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) |
1085 | { | 1085 | { |
1086 | struct task_struct *p; | 1086 | struct task_struct *p; |
1087 | struct task_struct *group_leader = current->group_leader; | 1087 | struct task_struct *group_leader = current->group_leader; |
1088 | struct pid *pgrp; | 1088 | struct pid *pgrp; |
1089 | int err; | 1089 | int err; |
1090 | 1090 | ||
1091 | if (!pid) | 1091 | if (!pid) |
1092 | pid = task_pid_vnr(group_leader); | 1092 | pid = task_pid_vnr(group_leader); |
1093 | if (!pgid) | 1093 | if (!pgid) |
1094 | pgid = pid; | 1094 | pgid = pid; |
1095 | if (pgid < 0) | 1095 | if (pgid < 0) |
1096 | return -EINVAL; | 1096 | return -EINVAL; |
1097 | rcu_read_lock(); | 1097 | rcu_read_lock(); |
1098 | 1098 | ||
1099 | /* From this point forward we keep holding onto the tasklist lock | 1099 | /* From this point forward we keep holding onto the tasklist lock |
1100 | * so that our parent does not change from under us. -DaveM | 1100 | * so that our parent does not change from under us. -DaveM |
1101 | */ | 1101 | */ |
1102 | write_lock_irq(&tasklist_lock); | 1102 | write_lock_irq(&tasklist_lock); |
1103 | 1103 | ||
1104 | err = -ESRCH; | 1104 | err = -ESRCH; |
1105 | p = find_task_by_vpid(pid); | 1105 | p = find_task_by_vpid(pid); |
1106 | if (!p) | 1106 | if (!p) |
1107 | goto out; | 1107 | goto out; |
1108 | 1108 | ||
1109 | err = -EINVAL; | 1109 | err = -EINVAL; |
1110 | if (!thread_group_leader(p)) | 1110 | if (!thread_group_leader(p)) |
1111 | goto out; | 1111 | goto out; |
1112 | 1112 | ||
1113 | if (same_thread_group(p->real_parent, group_leader)) { | 1113 | if (same_thread_group(p->real_parent, group_leader)) { |
1114 | err = -EPERM; | 1114 | err = -EPERM; |
1115 | if (task_session(p) != task_session(group_leader)) | 1115 | if (task_session(p) != task_session(group_leader)) |
1116 | goto out; | 1116 | goto out; |
1117 | err = -EACCES; | 1117 | err = -EACCES; |
1118 | if (p->did_exec) | 1118 | if (p->did_exec) |
1119 | goto out; | 1119 | goto out; |
1120 | } else { | 1120 | } else { |
1121 | err = -ESRCH; | 1121 | err = -ESRCH; |
1122 | if (p != group_leader) | 1122 | if (p != group_leader) |
1123 | goto out; | 1123 | goto out; |
1124 | } | 1124 | } |
1125 | 1125 | ||
1126 | err = -EPERM; | 1126 | err = -EPERM; |
1127 | if (p->signal->leader) | 1127 | if (p->signal->leader) |
1128 | goto out; | 1128 | goto out; |
1129 | 1129 | ||
1130 | pgrp = task_pid(p); | 1130 | pgrp = task_pid(p); |
1131 | if (pgid != pid) { | 1131 | if (pgid != pid) { |
1132 | struct task_struct *g; | 1132 | struct task_struct *g; |
1133 | 1133 | ||
1134 | pgrp = find_vpid(pgid); | 1134 | pgrp = find_vpid(pgid); |
1135 | g = pid_task(pgrp, PIDTYPE_PGID); | 1135 | g = pid_task(pgrp, PIDTYPE_PGID); |
1136 | if (!g || task_session(g) != task_session(group_leader)) | 1136 | if (!g || task_session(g) != task_session(group_leader)) |
1137 | goto out; | 1137 | goto out; |
1138 | } | 1138 | } |
1139 | 1139 | ||
1140 | err = security_task_setpgid(p, pgid); | 1140 | err = security_task_setpgid(p, pgid); |
1141 | if (err) | 1141 | if (err) |
1142 | goto out; | 1142 | goto out; |
1143 | 1143 | ||
1144 | if (task_pgrp(p) != pgrp) | 1144 | if (task_pgrp(p) != pgrp) |
1145 | change_pid(p, PIDTYPE_PGID, pgrp); | 1145 | change_pid(p, PIDTYPE_PGID, pgrp); |
1146 | 1146 | ||
1147 | err = 0; | 1147 | err = 0; |
1148 | out: | 1148 | out: |
1149 | /* All paths lead to here, thus we are safe. -DaveM */ | 1149 | /* All paths lead to here, thus we are safe. -DaveM */ |
1150 | write_unlock_irq(&tasklist_lock); | 1150 | write_unlock_irq(&tasklist_lock); |
1151 | rcu_read_unlock(); | 1151 | rcu_read_unlock(); |
1152 | return err; | 1152 | return err; |
1153 | } | 1153 | } |
1154 | 1154 | ||
1155 | SYSCALL_DEFINE1(getpgid, pid_t, pid) | 1155 | SYSCALL_DEFINE1(getpgid, pid_t, pid) |
1156 | { | 1156 | { |
1157 | struct task_struct *p; | 1157 | struct task_struct *p; |
1158 | struct pid *grp; | 1158 | struct pid *grp; |
1159 | int retval; | 1159 | int retval; |
1160 | 1160 | ||
1161 | rcu_read_lock(); | 1161 | rcu_read_lock(); |
1162 | if (!pid) | 1162 | if (!pid) |
1163 | grp = task_pgrp(current); | 1163 | grp = task_pgrp(current); |
1164 | else { | 1164 | else { |
1165 | retval = -ESRCH; | 1165 | retval = -ESRCH; |
1166 | p = find_task_by_vpid(pid); | 1166 | p = find_task_by_vpid(pid); |
1167 | if (!p) | 1167 | if (!p) |
1168 | goto out; | 1168 | goto out; |
1169 | grp = task_pgrp(p); | 1169 | grp = task_pgrp(p); |
1170 | if (!grp) | 1170 | if (!grp) |
1171 | goto out; | 1171 | goto out; |
1172 | 1172 | ||
1173 | retval = security_task_getpgid(p); | 1173 | retval = security_task_getpgid(p); |
1174 | if (retval) | 1174 | if (retval) |
1175 | goto out; | 1175 | goto out; |
1176 | } | 1176 | } |
1177 | retval = pid_vnr(grp); | 1177 | retval = pid_vnr(grp); |
1178 | out: | 1178 | out: |
1179 | rcu_read_unlock(); | 1179 | rcu_read_unlock(); |
1180 | return retval; | 1180 | return retval; |
1181 | } | 1181 | } |
1182 | 1182 | ||
1183 | #ifdef __ARCH_WANT_SYS_GETPGRP | 1183 | #ifdef __ARCH_WANT_SYS_GETPGRP |
1184 | 1184 | ||
1185 | SYSCALL_DEFINE0(getpgrp) | 1185 | SYSCALL_DEFINE0(getpgrp) |
1186 | { | 1186 | { |
1187 | return sys_getpgid(0); | 1187 | return sys_getpgid(0); |
1188 | } | 1188 | } |
1189 | 1189 | ||
1190 | #endif | 1190 | #endif |
1191 | 1191 | ||
1192 | SYSCALL_DEFINE1(getsid, pid_t, pid) | 1192 | SYSCALL_DEFINE1(getsid, pid_t, pid) |
1193 | { | 1193 | { |
1194 | struct task_struct *p; | 1194 | struct task_struct *p; |
1195 | struct pid *sid; | 1195 | struct pid *sid; |
1196 | int retval; | 1196 | int retval; |
1197 | 1197 | ||
1198 | rcu_read_lock(); | 1198 | rcu_read_lock(); |
1199 | if (!pid) | 1199 | if (!pid) |
1200 | sid = task_session(current); | 1200 | sid = task_session(current); |
1201 | else { | 1201 | else { |
1202 | retval = -ESRCH; | 1202 | retval = -ESRCH; |
1203 | p = find_task_by_vpid(pid); | 1203 | p = find_task_by_vpid(pid); |
1204 | if (!p) | 1204 | if (!p) |
1205 | goto out; | 1205 | goto out; |
1206 | sid = task_session(p); | 1206 | sid = task_session(p); |
1207 | if (!sid) | 1207 | if (!sid) |
1208 | goto out; | 1208 | goto out; |
1209 | 1209 | ||
1210 | retval = security_task_getsid(p); | 1210 | retval = security_task_getsid(p); |
1211 | if (retval) | 1211 | if (retval) |
1212 | goto out; | 1212 | goto out; |
1213 | } | 1213 | } |
1214 | retval = pid_vnr(sid); | 1214 | retval = pid_vnr(sid); |
1215 | out: | 1215 | out: |
1216 | rcu_read_unlock(); | 1216 | rcu_read_unlock(); |
1217 | return retval; | 1217 | return retval; |
1218 | } | 1218 | } |
1219 | 1219 | ||
1220 | SYSCALL_DEFINE0(setsid) | 1220 | SYSCALL_DEFINE0(setsid) |
1221 | { | 1221 | { |
1222 | struct task_struct *group_leader = current->group_leader; | 1222 | struct task_struct *group_leader = current->group_leader; |
1223 | struct pid *sid = task_pid(group_leader); | 1223 | struct pid *sid = task_pid(group_leader); |
1224 | pid_t session = pid_vnr(sid); | 1224 | pid_t session = pid_vnr(sid); |
1225 | int err = -EPERM; | 1225 | int err = -EPERM; |
1226 | 1226 | ||
1227 | write_lock_irq(&tasklist_lock); | 1227 | write_lock_irq(&tasklist_lock); |
1228 | /* Fail if I am already a session leader */ | 1228 | /* Fail if I am already a session leader */ |
1229 | if (group_leader->signal->leader) | 1229 | if (group_leader->signal->leader) |
1230 | goto out; | 1230 | goto out; |
1231 | 1231 | ||
1232 | /* Fail if a process group id already exists that equals the | 1232 | /* Fail if a process group id already exists that equals the |
1233 | * proposed session id. | 1233 | * proposed session id. |
1234 | */ | 1234 | */ |
1235 | if (pid_task(sid, PIDTYPE_PGID)) | 1235 | if (pid_task(sid, PIDTYPE_PGID)) |
1236 | goto out; | 1236 | goto out; |
1237 | 1237 | ||
1238 | group_leader->signal->leader = 1; | 1238 | group_leader->signal->leader = 1; |
1239 | __set_special_pids(sid); | 1239 | __set_special_pids(sid); |
1240 | 1240 | ||
1241 | proc_clear_tty(group_leader); | 1241 | proc_clear_tty(group_leader); |
1242 | 1242 | ||
1243 | err = session; | 1243 | err = session; |
1244 | out: | 1244 | out: |
1245 | write_unlock_irq(&tasklist_lock); | 1245 | write_unlock_irq(&tasklist_lock); |
1246 | if (err > 0) { | 1246 | if (err > 0) { |
1247 | proc_sid_connector(group_leader); | 1247 | proc_sid_connector(group_leader); |
1248 | sched_autogroup_create_attach(group_leader); | 1248 | sched_autogroup_create_attach(group_leader); |
1249 | } | 1249 | } |
1250 | return err; | 1250 | return err; |
1251 | } | 1251 | } |
1252 | 1252 | ||
1253 | DECLARE_RWSEM(uts_sem); | 1253 | DECLARE_RWSEM(uts_sem); |
1254 | 1254 | ||
1255 | #ifdef COMPAT_UTS_MACHINE | 1255 | #ifdef COMPAT_UTS_MACHINE |
1256 | #define override_architecture(name) \ | 1256 | #define override_architecture(name) \ |
1257 | (personality(current->personality) == PER_LINUX32 && \ | 1257 | (personality(current->personality) == PER_LINUX32 && \ |
1258 | copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ | 1258 | copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ |
1259 | sizeof(COMPAT_UTS_MACHINE))) | 1259 | sizeof(COMPAT_UTS_MACHINE))) |
1260 | #else | 1260 | #else |
1261 | #define override_architecture(name) 0 | 1261 | #define override_architecture(name) 0 |
1262 | #endif | 1262 | #endif |
1263 | 1263 | ||
1264 | /* | 1264 | /* |
1265 | * Work around broken programs that cannot handle "Linux 3.0". | 1265 | * Work around broken programs that cannot handle "Linux 3.0". |
1266 | * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 | 1266 | * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 |
1267 | */ | 1267 | */ |
1268 | static int override_release(char __user *release, size_t len) | 1268 | static int override_release(char __user *release, size_t len) |
1269 | { | 1269 | { |
1270 | int ret = 0; | 1270 | int ret = 0; |
1271 | 1271 | ||
1272 | if (current->personality & UNAME26) { | 1272 | if (current->personality & UNAME26) { |
1273 | const char *rest = UTS_RELEASE; | 1273 | const char *rest = UTS_RELEASE; |
1274 | char buf[65] = { 0 }; | 1274 | char buf[65] = { 0 }; |
1275 | int ndots = 0; | 1275 | int ndots = 0; |
1276 | unsigned v; | 1276 | unsigned v; |
1277 | size_t copy; | 1277 | size_t copy; |
1278 | 1278 | ||
1279 | while (*rest) { | 1279 | while (*rest) { |
1280 | if (*rest == '.' && ++ndots >= 3) | 1280 | if (*rest == '.' && ++ndots >= 3) |
1281 | break; | 1281 | break; |
1282 | if (!isdigit(*rest) && *rest != '.') | 1282 | if (!isdigit(*rest) && *rest != '.') |
1283 | break; | 1283 | break; |
1284 | rest++; | 1284 | rest++; |
1285 | } | 1285 | } |
1286 | v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; | 1286 | v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; |
1287 | copy = clamp_t(size_t, len, 1, sizeof(buf)); | 1287 | copy = clamp_t(size_t, len, 1, sizeof(buf)); |
1288 | copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); | 1288 | copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); |
1289 | ret = copy_to_user(release, buf, copy + 1); | 1289 | ret = copy_to_user(release, buf, copy + 1); |
1290 | } | 1290 | } |
1291 | return ret; | 1291 | return ret; |
1292 | } | 1292 | } |
1293 | 1293 | ||
1294 | SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) | 1294 | SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) |
1295 | { | 1295 | { |
1296 | int errno = 0; | 1296 | int errno = 0; |
1297 | 1297 | ||
1298 | down_read(&uts_sem); | 1298 | down_read(&uts_sem); |
1299 | if (copy_to_user(name, utsname(), sizeof *name)) | 1299 | if (copy_to_user(name, utsname(), sizeof *name)) |
1300 | errno = -EFAULT; | 1300 | errno = -EFAULT; |
1301 | up_read(&uts_sem); | 1301 | up_read(&uts_sem); |
1302 | 1302 | ||
1303 | if (!errno && override_release(name->release, sizeof(name->release))) | 1303 | if (!errno && override_release(name->release, sizeof(name->release))) |
1304 | errno = -EFAULT; | 1304 | errno = -EFAULT; |
1305 | if (!errno && override_architecture(name)) | 1305 | if (!errno && override_architecture(name)) |
1306 | errno = -EFAULT; | 1306 | errno = -EFAULT; |
1307 | return errno; | 1307 | return errno; |
1308 | } | 1308 | } |
1309 | 1309 | ||
1310 | #ifdef __ARCH_WANT_SYS_OLD_UNAME | 1310 | #ifdef __ARCH_WANT_SYS_OLD_UNAME |
1311 | /* | 1311 | /* |
1312 | * Old cruft | 1312 | * Old cruft |
1313 | */ | 1313 | */ |
1314 | SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) | 1314 | SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) |
1315 | { | 1315 | { |
1316 | int error = 0; | 1316 | int error = 0; |
1317 | 1317 | ||
1318 | if (!name) | 1318 | if (!name) |
1319 | return -EFAULT; | 1319 | return -EFAULT; |
1320 | 1320 | ||
1321 | down_read(&uts_sem); | 1321 | down_read(&uts_sem); |
1322 | if (copy_to_user(name, utsname(), sizeof(*name))) | 1322 | if (copy_to_user(name, utsname(), sizeof(*name))) |
1323 | error = -EFAULT; | 1323 | error = -EFAULT; |
1324 | up_read(&uts_sem); | 1324 | up_read(&uts_sem); |
1325 | 1325 | ||
1326 | if (!error && override_release(name->release, sizeof(name->release))) | 1326 | if (!error && override_release(name->release, sizeof(name->release))) |
1327 | error = -EFAULT; | 1327 | error = -EFAULT; |
1328 | if (!error && override_architecture(name)) | 1328 | if (!error && override_architecture(name)) |
1329 | error = -EFAULT; | 1329 | error = -EFAULT; |
1330 | return error; | 1330 | return error; |
1331 | } | 1331 | } |
1332 | 1332 | ||
1333 | SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) | 1333 | SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) |
1334 | { | 1334 | { |
1335 | int error; | 1335 | int error; |
1336 | 1336 | ||
1337 | if (!name) | 1337 | if (!name) |
1338 | return -EFAULT; | 1338 | return -EFAULT; |
1339 | if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) | 1339 | if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) |
1340 | return -EFAULT; | 1340 | return -EFAULT; |
1341 | 1341 | ||
1342 | down_read(&uts_sem); | 1342 | down_read(&uts_sem); |
1343 | error = __copy_to_user(&name->sysname, &utsname()->sysname, | 1343 | error = __copy_to_user(&name->sysname, &utsname()->sysname, |
1344 | __OLD_UTS_LEN); | 1344 | __OLD_UTS_LEN); |
1345 | error |= __put_user(0, name->sysname + __OLD_UTS_LEN); | 1345 | error |= __put_user(0, name->sysname + __OLD_UTS_LEN); |
1346 | error |= __copy_to_user(&name->nodename, &utsname()->nodename, | 1346 | error |= __copy_to_user(&name->nodename, &utsname()->nodename, |
1347 | __OLD_UTS_LEN); | 1347 | __OLD_UTS_LEN); |
1348 | error |= __put_user(0, name->nodename + __OLD_UTS_LEN); | 1348 | error |= __put_user(0, name->nodename + __OLD_UTS_LEN); |
1349 | error |= __copy_to_user(&name->release, &utsname()->release, | 1349 | error |= __copy_to_user(&name->release, &utsname()->release, |
1350 | __OLD_UTS_LEN); | 1350 | __OLD_UTS_LEN); |
1351 | error |= __put_user(0, name->release + __OLD_UTS_LEN); | 1351 | error |= __put_user(0, name->release + __OLD_UTS_LEN); |
1352 | error |= __copy_to_user(&name->version, &utsname()->version, | 1352 | error |= __copy_to_user(&name->version, &utsname()->version, |
1353 | __OLD_UTS_LEN); | 1353 | __OLD_UTS_LEN); |
1354 | error |= __put_user(0, name->version + __OLD_UTS_LEN); | 1354 | error |= __put_user(0, name->version + __OLD_UTS_LEN); |
1355 | error |= __copy_to_user(&name->machine, &utsname()->machine, | 1355 | error |= __copy_to_user(&name->machine, &utsname()->machine, |
1356 | __OLD_UTS_LEN); | 1356 | __OLD_UTS_LEN); |
1357 | error |= __put_user(0, name->machine + __OLD_UTS_LEN); | 1357 | error |= __put_user(0, name->machine + __OLD_UTS_LEN); |
1358 | up_read(&uts_sem); | 1358 | up_read(&uts_sem); |
1359 | 1359 | ||
1360 | if (!error && override_architecture(name)) | 1360 | if (!error && override_architecture(name)) |
1361 | error = -EFAULT; | 1361 | error = -EFAULT; |
1362 | if (!error && override_release(name->release, sizeof(name->release))) | 1362 | if (!error && override_release(name->release, sizeof(name->release))) |
1363 | error = -EFAULT; | 1363 | error = -EFAULT; |
1364 | return error ? -EFAULT : 0; | 1364 | return error ? -EFAULT : 0; |
1365 | } | 1365 | } |
1366 | #endif | 1366 | #endif |
1367 | 1367 | ||
1368 | SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) | 1368 | SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) |
1369 | { | 1369 | { |
1370 | int errno; | 1370 | int errno; |
1371 | char tmp[__NEW_UTS_LEN]; | 1371 | char tmp[__NEW_UTS_LEN]; |
1372 | 1372 | ||
1373 | if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) | 1373 | if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) |
1374 | return -EPERM; | 1374 | return -EPERM; |
1375 | 1375 | ||
1376 | if (len < 0 || len > __NEW_UTS_LEN) | 1376 | if (len < 0 || len > __NEW_UTS_LEN) |
1377 | return -EINVAL; | 1377 | return -EINVAL; |
1378 | down_write(&uts_sem); | 1378 | down_write(&uts_sem); |
1379 | errno = -EFAULT; | 1379 | errno = -EFAULT; |
1380 | if (!copy_from_user(tmp, name, len)) { | 1380 | if (!copy_from_user(tmp, name, len)) { |
1381 | struct new_utsname *u = utsname(); | 1381 | struct new_utsname *u = utsname(); |
1382 | 1382 | ||
1383 | memcpy(u->nodename, tmp, len); | 1383 | memcpy(u->nodename, tmp, len); |
1384 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); | 1384 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); |
1385 | errno = 0; | 1385 | errno = 0; |
1386 | uts_proc_notify(UTS_PROC_HOSTNAME); | 1386 | uts_proc_notify(UTS_PROC_HOSTNAME); |
1387 | } | 1387 | } |
1388 | up_write(&uts_sem); | 1388 | up_write(&uts_sem); |
1389 | return errno; | 1389 | return errno; |
1390 | } | 1390 | } |
1391 | 1391 | ||
1392 | #ifdef __ARCH_WANT_SYS_GETHOSTNAME | 1392 | #ifdef __ARCH_WANT_SYS_GETHOSTNAME |
1393 | 1393 | ||
1394 | SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) | 1394 | SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) |
1395 | { | 1395 | { |
1396 | int i, errno; | 1396 | int i, errno; |
1397 | struct new_utsname *u; | 1397 | struct new_utsname *u; |
1398 | 1398 | ||
1399 | if (len < 0) | 1399 | if (len < 0) |
1400 | return -EINVAL; | 1400 | return -EINVAL; |
1401 | down_read(&uts_sem); | 1401 | down_read(&uts_sem); |
1402 | u = utsname(); | 1402 | u = utsname(); |
1403 | i = 1 + strlen(u->nodename); | 1403 | i = 1 + strlen(u->nodename); |
1404 | if (i > len) | 1404 | if (i > len) |
1405 | i = len; | 1405 | i = len; |
1406 | errno = 0; | 1406 | errno = 0; |
1407 | if (copy_to_user(name, u->nodename, i)) | 1407 | if (copy_to_user(name, u->nodename, i)) |
1408 | errno = -EFAULT; | 1408 | errno = -EFAULT; |
1409 | up_read(&uts_sem); | 1409 | up_read(&uts_sem); |
1410 | return errno; | 1410 | return errno; |
1411 | } | 1411 | } |
1412 | 1412 | ||
1413 | #endif | 1413 | #endif |
1414 | 1414 | ||
1415 | /* | 1415 | /* |
1416 | * Only setdomainname; getdomainname can be implemented by calling | 1416 | * Only setdomainname; getdomainname can be implemented by calling |
1417 | * uname() | 1417 | * uname() |
1418 | */ | 1418 | */ |
1419 | SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) | 1419 | SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) |
1420 | { | 1420 | { |
1421 | int errno; | 1421 | int errno; |
1422 | char tmp[__NEW_UTS_LEN]; | 1422 | char tmp[__NEW_UTS_LEN]; |
1423 | 1423 | ||
1424 | if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) | 1424 | if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) |
1425 | return -EPERM; | 1425 | return -EPERM; |
1426 | if (len < 0 || len > __NEW_UTS_LEN) | 1426 | if (len < 0 || len > __NEW_UTS_LEN) |
1427 | return -EINVAL; | 1427 | return -EINVAL; |
1428 | 1428 | ||
1429 | down_write(&uts_sem); | 1429 | down_write(&uts_sem); |
1430 | errno = -EFAULT; | 1430 | errno = -EFAULT; |
1431 | if (!copy_from_user(tmp, name, len)) { | 1431 | if (!copy_from_user(tmp, name, len)) { |
1432 | struct new_utsname *u = utsname(); | 1432 | struct new_utsname *u = utsname(); |
1433 | 1433 | ||
1434 | memcpy(u->domainname, tmp, len); | 1434 | memcpy(u->domainname, tmp, len); |
1435 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); | 1435 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); |
1436 | errno = 0; | 1436 | errno = 0; |
1437 | uts_proc_notify(UTS_PROC_DOMAINNAME); | 1437 | uts_proc_notify(UTS_PROC_DOMAINNAME); |
1438 | } | 1438 | } |
1439 | up_write(&uts_sem); | 1439 | up_write(&uts_sem); |
1440 | return errno; | 1440 | return errno; |
1441 | } | 1441 | } |
1442 | 1442 | ||
1443 | SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) | 1443 | SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) |
1444 | { | 1444 | { |
1445 | struct rlimit value; | 1445 | struct rlimit value; |
1446 | int ret; | 1446 | int ret; |
1447 | 1447 | ||
1448 | ret = do_prlimit(current, resource, NULL, &value); | 1448 | ret = do_prlimit(current, resource, NULL, &value); |
1449 | if (!ret) | 1449 | if (!ret) |
1450 | ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; | 1450 | ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; |
1451 | 1451 | ||
1452 | return ret; | 1452 | return ret; |
1453 | } | 1453 | } |
1454 | 1454 | ||
1455 | #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT | 1455 | #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT |
1456 | 1456 | ||
1457 | /* | 1457 | /* |
1458 | * Back compatibility for getrlimit. Needed for some apps. | 1458 | * Back compatibility for getrlimit. Needed for some apps. |
1459 | */ | 1459 | */ |
1460 | 1460 | ||
1461 | SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, | 1461 | SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, |
1462 | struct rlimit __user *, rlim) | 1462 | struct rlimit __user *, rlim) |
1463 | { | 1463 | { |
1464 | struct rlimit x; | 1464 | struct rlimit x; |
1465 | if (resource >= RLIM_NLIMITS) | 1465 | if (resource >= RLIM_NLIMITS) |
1466 | return -EINVAL; | 1466 | return -EINVAL; |
1467 | 1467 | ||
1468 | task_lock(current->group_leader); | 1468 | task_lock(current->group_leader); |
1469 | x = current->signal->rlim[resource]; | 1469 | x = current->signal->rlim[resource]; |
1470 | task_unlock(current->group_leader); | 1470 | task_unlock(current->group_leader); |
1471 | if (x.rlim_cur > 0x7FFFFFFF) | 1471 | if (x.rlim_cur > 0x7FFFFFFF) |
1472 | x.rlim_cur = 0x7FFFFFFF; | 1472 | x.rlim_cur = 0x7FFFFFFF; |
1473 | if (x.rlim_max > 0x7FFFFFFF) | 1473 | if (x.rlim_max > 0x7FFFFFFF) |
1474 | x.rlim_max = 0x7FFFFFFF; | 1474 | x.rlim_max = 0x7FFFFFFF; |
1475 | return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; | 1475 | return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; |
1476 | } | 1476 | } |
1477 | 1477 | ||
1478 | #endif | 1478 | #endif |
1479 | 1479 | ||
1480 | static inline bool rlim64_is_infinity(__u64 rlim64) | 1480 | static inline bool rlim64_is_infinity(__u64 rlim64) |
1481 | { | 1481 | { |
1482 | #if BITS_PER_LONG < 64 | 1482 | #if BITS_PER_LONG < 64 |
1483 | return rlim64 >= ULONG_MAX; | 1483 | return rlim64 >= ULONG_MAX; |
1484 | #else | 1484 | #else |
1485 | return rlim64 == RLIM64_INFINITY; | 1485 | return rlim64 == RLIM64_INFINITY; |
1486 | #endif | 1486 | #endif |
1487 | } | 1487 | } |
1488 | 1488 | ||
1489 | static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) | 1489 | static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) |
1490 | { | 1490 | { |
1491 | if (rlim->rlim_cur == RLIM_INFINITY) | 1491 | if (rlim->rlim_cur == RLIM_INFINITY) |
1492 | rlim64->rlim_cur = RLIM64_INFINITY; | 1492 | rlim64->rlim_cur = RLIM64_INFINITY; |
1493 | else | 1493 | else |
1494 | rlim64->rlim_cur = rlim->rlim_cur; | 1494 | rlim64->rlim_cur = rlim->rlim_cur; |
1495 | if (rlim->rlim_max == RLIM_INFINITY) | 1495 | if (rlim->rlim_max == RLIM_INFINITY) |
1496 | rlim64->rlim_max = RLIM64_INFINITY; | 1496 | rlim64->rlim_max = RLIM64_INFINITY; |
1497 | else | 1497 | else |
1498 | rlim64->rlim_max = rlim->rlim_max; | 1498 | rlim64->rlim_max = rlim->rlim_max; |
1499 | } | 1499 | } |
1500 | 1500 | ||
1501 | static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) | 1501 | static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) |
1502 | { | 1502 | { |
1503 | if (rlim64_is_infinity(rlim64->rlim_cur)) | 1503 | if (rlim64_is_infinity(rlim64->rlim_cur)) |
1504 | rlim->rlim_cur = RLIM_INFINITY; | 1504 | rlim->rlim_cur = RLIM_INFINITY; |
1505 | else | 1505 | else |
1506 | rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; | 1506 | rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; |
1507 | if (rlim64_is_infinity(rlim64->rlim_max)) | 1507 | if (rlim64_is_infinity(rlim64->rlim_max)) |
1508 | rlim->rlim_max = RLIM_INFINITY; | 1508 | rlim->rlim_max = RLIM_INFINITY; |
1509 | else | 1509 | else |
1510 | rlim->rlim_max = (unsigned long)rlim64->rlim_max; | 1510 | rlim->rlim_max = (unsigned long)rlim64->rlim_max; |
1511 | } | 1511 | } |
1512 | 1512 | ||
1513 | /* make sure you are allowed to change @tsk limits before calling this */ | 1513 | /* make sure you are allowed to change @tsk limits before calling this */ |
1514 | int do_prlimit(struct task_struct *tsk, unsigned int resource, | 1514 | int do_prlimit(struct task_struct *tsk, unsigned int resource, |
1515 | struct rlimit *new_rlim, struct rlimit *old_rlim) | 1515 | struct rlimit *new_rlim, struct rlimit *old_rlim) |
1516 | { | 1516 | { |
1517 | struct rlimit *rlim; | 1517 | struct rlimit *rlim; |
1518 | int retval = 0; | 1518 | int retval = 0; |
1519 | 1519 | ||
1520 | if (resource >= RLIM_NLIMITS) | 1520 | if (resource >= RLIM_NLIMITS) |
1521 | return -EINVAL; | 1521 | return -EINVAL; |
1522 | if (new_rlim) { | 1522 | if (new_rlim) { |
1523 | if (new_rlim->rlim_cur > new_rlim->rlim_max) | 1523 | if (new_rlim->rlim_cur > new_rlim->rlim_max) |
1524 | return -EINVAL; | 1524 | return -EINVAL; |
1525 | if (resource == RLIMIT_NOFILE && | 1525 | if (resource == RLIMIT_NOFILE && |
1526 | new_rlim->rlim_max > sysctl_nr_open) | 1526 | new_rlim->rlim_max > sysctl_nr_open) |
1527 | return -EPERM; | 1527 | return -EPERM; |
1528 | } | 1528 | } |
1529 | 1529 | ||
1530 | /* protect tsk->signal and tsk->sighand from disappearing */ | 1530 | /* protect tsk->signal and tsk->sighand from disappearing */ |
1531 | read_lock(&tasklist_lock); | 1531 | read_lock(&tasklist_lock); |
1532 | if (!tsk->sighand) { | 1532 | if (!tsk->sighand) { |
1533 | retval = -ESRCH; | 1533 | retval = -ESRCH; |
1534 | goto out; | 1534 | goto out; |
1535 | } | 1535 | } |
1536 | 1536 | ||
1537 | rlim = tsk->signal->rlim + resource; | 1537 | rlim = tsk->signal->rlim + resource; |
1538 | task_lock(tsk->group_leader); | 1538 | task_lock(tsk->group_leader); |
1539 | if (new_rlim) { | 1539 | if (new_rlim) { |
1540 | /* Keep the capable check against init_user_ns until | 1540 | /* Keep the capable check against init_user_ns until |
1541 | cgroups can contain all limits */ | 1541 | cgroups can contain all limits */ |
1542 | if (new_rlim->rlim_max > rlim->rlim_max && | 1542 | if (new_rlim->rlim_max > rlim->rlim_max && |
1543 | !capable(CAP_SYS_RESOURCE)) | 1543 | !capable(CAP_SYS_RESOURCE)) |
1544 | retval = -EPERM; | 1544 | retval = -EPERM; |
1545 | if (!retval) | 1545 | if (!retval) |
1546 | retval = security_task_setrlimit(tsk->group_leader, | 1546 | retval = security_task_setrlimit(tsk->group_leader, |
1547 | resource, new_rlim); | 1547 | resource, new_rlim); |
1548 | if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { | 1548 | if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { |
1549 | /* | 1549 | /* |
1550 | * The caller is asking for an immediate RLIMIT_CPU | 1550 | * The caller is asking for an immediate RLIMIT_CPU |
1551 | * expiry. But we use the zero value to mean "it was | 1551 | * expiry. But we use the zero value to mean "it was |
1552 | * never set". So let's cheat and make it one second | 1552 | * never set". So let's cheat and make it one second |
1553 | * instead | 1553 | * instead |
1554 | */ | 1554 | */ |
1555 | new_rlim->rlim_cur = 1; | 1555 | new_rlim->rlim_cur = 1; |
1556 | } | 1556 | } |
1557 | } | 1557 | } |
1558 | if (!retval) { | 1558 | if (!retval) { |
1559 | if (old_rlim) | 1559 | if (old_rlim) |
1560 | *old_rlim = *rlim; | 1560 | *old_rlim = *rlim; |
1561 | if (new_rlim) | 1561 | if (new_rlim) |
1562 | *rlim = *new_rlim; | 1562 | *rlim = *new_rlim; |
1563 | } | 1563 | } |
1564 | task_unlock(tsk->group_leader); | 1564 | task_unlock(tsk->group_leader); |
1565 | 1565 | ||
1566 | /* | 1566 | /* |
1567 | * RLIMIT_CPU handling. Note that the kernel fails to return an error | 1567 | * RLIMIT_CPU handling. Note that the kernel fails to return an error |
1568 | * code if it rejected the user's attempt to set RLIMIT_CPU. This is a | 1568 | * code if it rejected the user's attempt to set RLIMIT_CPU. This is a |
1569 | * very long-standing error, and fixing it now risks breakage of | 1569 | * very long-standing error, and fixing it now risks breakage of |
1570 | * applications, so we live with it | 1570 | * applications, so we live with it |
1571 | */ | 1571 | */ |
1572 | if (!retval && new_rlim && resource == RLIMIT_CPU && | 1572 | if (!retval && new_rlim && resource == RLIMIT_CPU && |
1573 | new_rlim->rlim_cur != RLIM_INFINITY) | 1573 | new_rlim->rlim_cur != RLIM_INFINITY) |
1574 | update_rlimit_cpu(tsk, new_rlim->rlim_cur); | 1574 | update_rlimit_cpu(tsk, new_rlim->rlim_cur); |
1575 | out: | 1575 | out: |
1576 | read_unlock(&tasklist_lock); | 1576 | read_unlock(&tasklist_lock); |
1577 | return retval; | 1577 | return retval; |
1578 | } | 1578 | } |
1579 | 1579 | ||
1580 | /* rcu lock must be held */ | 1580 | /* rcu lock must be held */ |
1581 | static int check_prlimit_permission(struct task_struct *task) | 1581 | static int check_prlimit_permission(struct task_struct *task) |
1582 | { | 1582 | { |
1583 | const struct cred *cred = current_cred(), *tcred; | 1583 | const struct cred *cred = current_cred(), *tcred; |
1584 | 1584 | ||
1585 | if (current == task) | 1585 | if (current == task) |
1586 | return 0; | 1586 | return 0; |
1587 | 1587 | ||
1588 | tcred = __task_cred(task); | 1588 | tcred = __task_cred(task); |
1589 | if (uid_eq(cred->uid, tcred->euid) && | 1589 | if (uid_eq(cred->uid, tcred->euid) && |
1590 | uid_eq(cred->uid, tcred->suid) && | 1590 | uid_eq(cred->uid, tcred->suid) && |
1591 | uid_eq(cred->uid, tcred->uid) && | 1591 | uid_eq(cred->uid, tcred->uid) && |
1592 | gid_eq(cred->gid, tcred->egid) && | 1592 | gid_eq(cred->gid, tcred->egid) && |
1593 | gid_eq(cred->gid, tcred->sgid) && | 1593 | gid_eq(cred->gid, tcred->sgid) && |
1594 | gid_eq(cred->gid, tcred->gid)) | 1594 | gid_eq(cred->gid, tcred->gid)) |
1595 | return 0; | 1595 | return 0; |
1596 | if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) | 1596 | if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) |
1597 | return 0; | 1597 | return 0; |
1598 | 1598 | ||
1599 | return -EPERM; | 1599 | return -EPERM; |
1600 | } | 1600 | } |
1601 | 1601 | ||
1602 | SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, | 1602 | SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, |
1603 | const struct rlimit64 __user *, new_rlim, | 1603 | const struct rlimit64 __user *, new_rlim, |
1604 | struct rlimit64 __user *, old_rlim) | 1604 | struct rlimit64 __user *, old_rlim) |
1605 | { | 1605 | { |
1606 | struct rlimit64 old64, new64; | 1606 | struct rlimit64 old64, new64; |
1607 | struct rlimit old, new; | 1607 | struct rlimit old, new; |
1608 | struct task_struct *tsk; | 1608 | struct task_struct *tsk; |
1609 | int ret; | 1609 | int ret; |
1610 | 1610 | ||
1611 | if (new_rlim) { | 1611 | if (new_rlim) { |
1612 | if (copy_from_user(&new64, new_rlim, sizeof(new64))) | 1612 | if (copy_from_user(&new64, new_rlim, sizeof(new64))) |
1613 | return -EFAULT; | 1613 | return -EFAULT; |
1614 | rlim64_to_rlim(&new64, &new); | 1614 | rlim64_to_rlim(&new64, &new); |
1615 | } | 1615 | } |
1616 | 1616 | ||
1617 | rcu_read_lock(); | 1617 | rcu_read_lock(); |
1618 | tsk = pid ? find_task_by_vpid(pid) : current; | 1618 | tsk = pid ? find_task_by_vpid(pid) : current; |
1619 | if (!tsk) { | 1619 | if (!tsk) { |
1620 | rcu_read_unlock(); | 1620 | rcu_read_unlock(); |
1621 | return -ESRCH; | 1621 | return -ESRCH; |
1622 | } | 1622 | } |
1623 | ret = check_prlimit_permission(tsk); | 1623 | ret = check_prlimit_permission(tsk); |
1624 | if (ret) { | 1624 | if (ret) { |
1625 | rcu_read_unlock(); | 1625 | rcu_read_unlock(); |
1626 | return ret; | 1626 | return ret; |
1627 | } | 1627 | } |
1628 | get_task_struct(tsk); | 1628 | get_task_struct(tsk); |
1629 | rcu_read_unlock(); | 1629 | rcu_read_unlock(); |
1630 | 1630 | ||
1631 | ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, | 1631 | ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, |
1632 | old_rlim ? &old : NULL); | 1632 | old_rlim ? &old : NULL); |
1633 | 1633 | ||
1634 | if (!ret && old_rlim) { | 1634 | if (!ret && old_rlim) { |
1635 | rlim_to_rlim64(&old, &old64); | 1635 | rlim_to_rlim64(&old, &old64); |
1636 | if (copy_to_user(old_rlim, &old64, sizeof(old64))) | 1636 | if (copy_to_user(old_rlim, &old64, sizeof(old64))) |
1637 | ret = -EFAULT; | 1637 | ret = -EFAULT; |
1638 | } | 1638 | } |
1639 | 1639 | ||
1640 | put_task_struct(tsk); | 1640 | put_task_struct(tsk); |
1641 | return ret; | 1641 | return ret; |
1642 | } | 1642 | } |
1643 | 1643 | ||
1644 | SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) | 1644 | SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) |
1645 | { | 1645 | { |
1646 | struct rlimit new_rlim; | 1646 | struct rlimit new_rlim; |
1647 | 1647 | ||
1648 | if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) | 1648 | if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) |
1649 | return -EFAULT; | 1649 | return -EFAULT; |
1650 | return do_prlimit(current, resource, &new_rlim, NULL); | 1650 | return do_prlimit(current, resource, &new_rlim, NULL); |
1651 | } | 1651 | } |
1652 | 1652 | ||
1653 | /* | 1653 | /* |
1654 | * It would make sense to put struct rusage in the task_struct, | 1654 | * It would make sense to put struct rusage in the task_struct, |
1655 | * except that would make the task_struct be *really big*. After | 1655 | * except that would make the task_struct be *really big*. After |
1656 | * task_struct gets moved into malloc'ed memory, it would | 1656 | * task_struct gets moved into malloc'ed memory, it would |
1657 | * make sense to do this. It will make moving the rest of the information | 1657 | * make sense to do this. It will make moving the rest of the information |
1658 | * a lot simpler! (Which we're not doing right now because we're not | 1658 | * a lot simpler! (Which we're not doing right now because we're not |
1659 | * measuring them yet). | 1659 | * measuring them yet). |
1660 | * | 1660 | * |
1661 | * When sampling multiple threads for RUSAGE_SELF, under SMP we might have | 1661 | * When sampling multiple threads for RUSAGE_SELF, under SMP we might have |
1662 | * races with threads incrementing their own counters. But since word | 1662 | * races with threads incrementing their own counters. But since word |
1663 | * reads are atomic, we either get new values or old values and we don't | 1663 | * reads are atomic, we either get new values or old values and we don't |
1664 | * care which for the sums. We always take the siglock to protect reading | 1664 | * care which for the sums. We always take the siglock to protect reading |
1665 | * the c* fields from p->signal from races with exit.c updating those | 1665 | * the c* fields from p->signal from races with exit.c updating those |
1666 | * fields when reaping, so a sample either gets all the additions of a | 1666 | * fields when reaping, so a sample either gets all the additions of a |
1667 | * given child after it's reaped, or none so this sample is before reaping. | 1667 | * given child after it's reaped, or none so this sample is before reaping. |
1668 | * | 1668 | * |
1669 | * Locking: | 1669 | * Locking: |
1670 | * We need to take the siglock for CHILDEREN, SELF and BOTH | 1670 | * We need to take the siglock for CHILDEREN, SELF and BOTH |
1671 | * for the cases current multithreaded, non-current single threaded | 1671 | * for the cases current multithreaded, non-current single threaded |
1672 | * non-current multithreaded. Thread traversal is now safe with | 1672 | * non-current multithreaded. Thread traversal is now safe with |
1673 | * the siglock held. | 1673 | * the siglock held. |
1674 | * Strictly speaking, we donot need to take the siglock if we are current and | 1674 | * Strictly speaking, we donot need to take the siglock if we are current and |
1675 | * single threaded, as no one else can take our signal_struct away, no one | 1675 | * single threaded, as no one else can take our signal_struct away, no one |
1676 | * else can reap the children to update signal->c* counters, and no one else | 1676 | * else can reap the children to update signal->c* counters, and no one else |
1677 | * can race with the signal-> fields. If we do not take any lock, the | 1677 | * can race with the signal-> fields. If we do not take any lock, the |
1678 | * signal-> fields could be read out of order while another thread was just | 1678 | * signal-> fields could be read out of order while another thread was just |
1679 | * exiting. So we should place a read memory barrier when we avoid the lock. | 1679 | * exiting. So we should place a read memory barrier when we avoid the lock. |
1680 | * On the writer side, write memory barrier is implied in __exit_signal | 1680 | * On the writer side, write memory barrier is implied in __exit_signal |
1681 | * as __exit_signal releases the siglock spinlock after updating the signal-> | 1681 | * as __exit_signal releases the siglock spinlock after updating the signal-> |
1682 | * fields. But we don't do this yet to keep things simple. | 1682 | * fields. But we don't do this yet to keep things simple. |
1683 | * | 1683 | * |
1684 | */ | 1684 | */ |
1685 | 1685 | ||
1686 | static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) | 1686 | static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) |
1687 | { | 1687 | { |
1688 | r->ru_nvcsw += t->nvcsw; | 1688 | r->ru_nvcsw += t->nvcsw; |
1689 | r->ru_nivcsw += t->nivcsw; | 1689 | r->ru_nivcsw += t->nivcsw; |
1690 | r->ru_minflt += t->min_flt; | 1690 | r->ru_minflt += t->min_flt; |
1691 | r->ru_majflt += t->maj_flt; | 1691 | r->ru_majflt += t->maj_flt; |
1692 | r->ru_inblock += task_io_get_inblock(t); | 1692 | r->ru_inblock += task_io_get_inblock(t); |
1693 | r->ru_oublock += task_io_get_oublock(t); | 1693 | r->ru_oublock += task_io_get_oublock(t); |
1694 | } | 1694 | } |
1695 | 1695 | ||
1696 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | 1696 | static void k_getrusage(struct task_struct *p, int who, struct rusage *r) |
1697 | { | 1697 | { |
1698 | struct task_struct *t; | 1698 | struct task_struct *t; |
1699 | unsigned long flags; | 1699 | unsigned long flags; |
1700 | cputime_t tgutime, tgstime, utime, stime; | 1700 | cputime_t tgutime, tgstime, utime, stime; |
1701 | unsigned long maxrss = 0; | 1701 | unsigned long maxrss = 0; |
1702 | 1702 | ||
1703 | memset((char *) r, 0, sizeof *r); | 1703 | memset((char *) r, 0, sizeof *r); |
1704 | utime = stime = 0; | 1704 | utime = stime = 0; |
1705 | 1705 | ||
1706 | if (who == RUSAGE_THREAD) { | 1706 | if (who == RUSAGE_THREAD) { |
1707 | task_times(current, &utime, &stime); | 1707 | task_cputime_adjusted(current, &utime, &stime); |
1708 | accumulate_thread_rusage(p, r); | 1708 | accumulate_thread_rusage(p, r); |
1709 | maxrss = p->signal->maxrss; | 1709 | maxrss = p->signal->maxrss; |
1710 | goto out; | 1710 | goto out; |
1711 | } | 1711 | } |
1712 | 1712 | ||
1713 | if (!lock_task_sighand(p, &flags)) | 1713 | if (!lock_task_sighand(p, &flags)) |
1714 | return; | 1714 | return; |
1715 | 1715 | ||
1716 | switch (who) { | 1716 | switch (who) { |
1717 | case RUSAGE_BOTH: | 1717 | case RUSAGE_BOTH: |
1718 | case RUSAGE_CHILDREN: | 1718 | case RUSAGE_CHILDREN: |
1719 | utime = p->signal->cutime; | 1719 | utime = p->signal->cutime; |
1720 | stime = p->signal->cstime; | 1720 | stime = p->signal->cstime; |
1721 | r->ru_nvcsw = p->signal->cnvcsw; | 1721 | r->ru_nvcsw = p->signal->cnvcsw; |
1722 | r->ru_nivcsw = p->signal->cnivcsw; | 1722 | r->ru_nivcsw = p->signal->cnivcsw; |
1723 | r->ru_minflt = p->signal->cmin_flt; | 1723 | r->ru_minflt = p->signal->cmin_flt; |
1724 | r->ru_majflt = p->signal->cmaj_flt; | 1724 | r->ru_majflt = p->signal->cmaj_flt; |
1725 | r->ru_inblock = p->signal->cinblock; | 1725 | r->ru_inblock = p->signal->cinblock; |
1726 | r->ru_oublock = p->signal->coublock; | 1726 | r->ru_oublock = p->signal->coublock; |
1727 | maxrss = p->signal->cmaxrss; | 1727 | maxrss = p->signal->cmaxrss; |
1728 | 1728 | ||
1729 | if (who == RUSAGE_CHILDREN) | 1729 | if (who == RUSAGE_CHILDREN) |
1730 | break; | 1730 | break; |
1731 | 1731 | ||
1732 | case RUSAGE_SELF: | 1732 | case RUSAGE_SELF: |
1733 | thread_group_times(p, &tgutime, &tgstime); | 1733 | thread_group_cputime_adjusted(p, &tgutime, &tgstime); |
1734 | utime += tgutime; | 1734 | utime += tgutime; |
1735 | stime += tgstime; | 1735 | stime += tgstime; |
1736 | r->ru_nvcsw += p->signal->nvcsw; | 1736 | r->ru_nvcsw += p->signal->nvcsw; |
1737 | r->ru_nivcsw += p->signal->nivcsw; | 1737 | r->ru_nivcsw += p->signal->nivcsw; |
1738 | r->ru_minflt += p->signal->min_flt; | 1738 | r->ru_minflt += p->signal->min_flt; |
1739 | r->ru_majflt += p->signal->maj_flt; | 1739 | r->ru_majflt += p->signal->maj_flt; |
1740 | r->ru_inblock += p->signal->inblock; | 1740 | r->ru_inblock += p->signal->inblock; |
1741 | r->ru_oublock += p->signal->oublock; | 1741 | r->ru_oublock += p->signal->oublock; |
1742 | if (maxrss < p->signal->maxrss) | 1742 | if (maxrss < p->signal->maxrss) |
1743 | maxrss = p->signal->maxrss; | 1743 | maxrss = p->signal->maxrss; |
1744 | t = p; | 1744 | t = p; |
1745 | do { | 1745 | do { |
1746 | accumulate_thread_rusage(t, r); | 1746 | accumulate_thread_rusage(t, r); |
1747 | t = next_thread(t); | 1747 | t = next_thread(t); |
1748 | } while (t != p); | 1748 | } while (t != p); |
1749 | break; | 1749 | break; |
1750 | 1750 | ||
1751 | default: | 1751 | default: |
1752 | BUG(); | 1752 | BUG(); |
1753 | } | 1753 | } |
1754 | unlock_task_sighand(p, &flags); | 1754 | unlock_task_sighand(p, &flags); |
1755 | 1755 | ||
1756 | out: | 1756 | out: |
1757 | cputime_to_timeval(utime, &r->ru_utime); | 1757 | cputime_to_timeval(utime, &r->ru_utime); |
1758 | cputime_to_timeval(stime, &r->ru_stime); | 1758 | cputime_to_timeval(stime, &r->ru_stime); |
1759 | 1759 | ||
1760 | if (who != RUSAGE_CHILDREN) { | 1760 | if (who != RUSAGE_CHILDREN) { |
1761 | struct mm_struct *mm = get_task_mm(p); | 1761 | struct mm_struct *mm = get_task_mm(p); |
1762 | if (mm) { | 1762 | if (mm) { |
1763 | setmax_mm_hiwater_rss(&maxrss, mm); | 1763 | setmax_mm_hiwater_rss(&maxrss, mm); |
1764 | mmput(mm); | 1764 | mmput(mm); |
1765 | } | 1765 | } |
1766 | } | 1766 | } |
1767 | r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ | 1767 | r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ |
1768 | } | 1768 | } |
1769 | 1769 | ||
1770 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) | 1770 | int getrusage(struct task_struct *p, int who, struct rusage __user *ru) |
1771 | { | 1771 | { |
1772 | struct rusage r; | 1772 | struct rusage r; |
1773 | k_getrusage(p, who, &r); | 1773 | k_getrusage(p, who, &r); |
1774 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; | 1774 | return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; |
1775 | } | 1775 | } |
1776 | 1776 | ||
1777 | SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) | 1777 | SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) |
1778 | { | 1778 | { |
1779 | if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && | 1779 | if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && |
1780 | who != RUSAGE_THREAD) | 1780 | who != RUSAGE_THREAD) |
1781 | return -EINVAL; | 1781 | return -EINVAL; |
1782 | return getrusage(current, who, ru); | 1782 | return getrusage(current, who, ru); |
1783 | } | 1783 | } |
1784 | 1784 | ||
1785 | SYSCALL_DEFINE1(umask, int, mask) | 1785 | SYSCALL_DEFINE1(umask, int, mask) |
1786 | { | 1786 | { |
1787 | mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); | 1787 | mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); |
1788 | return mask; | 1788 | return mask; |
1789 | } | 1789 | } |
1790 | 1790 | ||
1791 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1791 | #ifdef CONFIG_CHECKPOINT_RESTORE |
1792 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | 1792 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) |
1793 | { | 1793 | { |
1794 | struct fd exe; | 1794 | struct fd exe; |
1795 | struct dentry *dentry; | 1795 | struct dentry *dentry; |
1796 | int err; | 1796 | int err; |
1797 | 1797 | ||
1798 | exe = fdget(fd); | 1798 | exe = fdget(fd); |
1799 | if (!exe.file) | 1799 | if (!exe.file) |
1800 | return -EBADF; | 1800 | return -EBADF; |
1801 | 1801 | ||
1802 | dentry = exe.file->f_path.dentry; | 1802 | dentry = exe.file->f_path.dentry; |
1803 | 1803 | ||
1804 | /* | 1804 | /* |
1805 | * Because the original mm->exe_file points to executable file, make | 1805 | * Because the original mm->exe_file points to executable file, make |
1806 | * sure that this one is executable as well, to avoid breaking an | 1806 | * sure that this one is executable as well, to avoid breaking an |
1807 | * overall picture. | 1807 | * overall picture. |
1808 | */ | 1808 | */ |
1809 | err = -EACCES; | 1809 | err = -EACCES; |
1810 | if (!S_ISREG(dentry->d_inode->i_mode) || | 1810 | if (!S_ISREG(dentry->d_inode->i_mode) || |
1811 | exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) | 1811 | exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) |
1812 | goto exit; | 1812 | goto exit; |
1813 | 1813 | ||
1814 | err = inode_permission(dentry->d_inode, MAY_EXEC); | 1814 | err = inode_permission(dentry->d_inode, MAY_EXEC); |
1815 | if (err) | 1815 | if (err) |
1816 | goto exit; | 1816 | goto exit; |
1817 | 1817 | ||
1818 | down_write(&mm->mmap_sem); | 1818 | down_write(&mm->mmap_sem); |
1819 | 1819 | ||
1820 | /* | 1820 | /* |
1821 | * Forbid mm->exe_file change if old file still mapped. | 1821 | * Forbid mm->exe_file change if old file still mapped. |
1822 | */ | 1822 | */ |
1823 | err = -EBUSY; | 1823 | err = -EBUSY; |
1824 | if (mm->exe_file) { | 1824 | if (mm->exe_file) { |
1825 | struct vm_area_struct *vma; | 1825 | struct vm_area_struct *vma; |
1826 | 1826 | ||
1827 | for (vma = mm->mmap; vma; vma = vma->vm_next) | 1827 | for (vma = mm->mmap; vma; vma = vma->vm_next) |
1828 | if (vma->vm_file && | 1828 | if (vma->vm_file && |
1829 | path_equal(&vma->vm_file->f_path, | 1829 | path_equal(&vma->vm_file->f_path, |
1830 | &mm->exe_file->f_path)) | 1830 | &mm->exe_file->f_path)) |
1831 | goto exit_unlock; | 1831 | goto exit_unlock; |
1832 | } | 1832 | } |
1833 | 1833 | ||
1834 | /* | 1834 | /* |
1835 | * The symlink can be changed only once, just to disallow arbitrary | 1835 | * The symlink can be changed only once, just to disallow arbitrary |
1836 | * transitions malicious software might bring in. This means one | 1836 | * transitions malicious software might bring in. This means one |
1837 | * could make a snapshot over all processes running and monitor | 1837 | * could make a snapshot over all processes running and monitor |
1838 | * /proc/pid/exe changes to notice unusual activity if needed. | 1838 | * /proc/pid/exe changes to notice unusual activity if needed. |
1839 | */ | 1839 | */ |
1840 | err = -EPERM; | 1840 | err = -EPERM; |
1841 | if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) | 1841 | if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) |
1842 | goto exit_unlock; | 1842 | goto exit_unlock; |
1843 | 1843 | ||
1844 | err = 0; | 1844 | err = 0; |
1845 | set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ | 1845 | set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ |
1846 | exit_unlock: | 1846 | exit_unlock: |
1847 | up_write(&mm->mmap_sem); | 1847 | up_write(&mm->mmap_sem); |
1848 | 1848 | ||
1849 | exit: | 1849 | exit: |
1850 | fdput(exe); | 1850 | fdput(exe); |
1851 | return err; | 1851 | return err; |
1852 | } | 1852 | } |
1853 | 1853 | ||
1854 | static int prctl_set_mm(int opt, unsigned long addr, | 1854 | static int prctl_set_mm(int opt, unsigned long addr, |
1855 | unsigned long arg4, unsigned long arg5) | 1855 | unsigned long arg4, unsigned long arg5) |
1856 | { | 1856 | { |
1857 | unsigned long rlim = rlimit(RLIMIT_DATA); | 1857 | unsigned long rlim = rlimit(RLIMIT_DATA); |
1858 | struct mm_struct *mm = current->mm; | 1858 | struct mm_struct *mm = current->mm; |
1859 | struct vm_area_struct *vma; | 1859 | struct vm_area_struct *vma; |
1860 | int error; | 1860 | int error; |
1861 | 1861 | ||
1862 | if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) | 1862 | if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) |
1863 | return -EINVAL; | 1863 | return -EINVAL; |
1864 | 1864 | ||
1865 | if (!capable(CAP_SYS_RESOURCE)) | 1865 | if (!capable(CAP_SYS_RESOURCE)) |
1866 | return -EPERM; | 1866 | return -EPERM; |
1867 | 1867 | ||
1868 | if (opt == PR_SET_MM_EXE_FILE) | 1868 | if (opt == PR_SET_MM_EXE_FILE) |
1869 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); | 1869 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); |
1870 | 1870 | ||
1871 | if (addr >= TASK_SIZE || addr < mmap_min_addr) | 1871 | if (addr >= TASK_SIZE || addr < mmap_min_addr) |
1872 | return -EINVAL; | 1872 | return -EINVAL; |
1873 | 1873 | ||
1874 | error = -EINVAL; | 1874 | error = -EINVAL; |
1875 | 1875 | ||
1876 | down_read(&mm->mmap_sem); | 1876 | down_read(&mm->mmap_sem); |
1877 | vma = find_vma(mm, addr); | 1877 | vma = find_vma(mm, addr); |
1878 | 1878 | ||
1879 | switch (opt) { | 1879 | switch (opt) { |
1880 | case PR_SET_MM_START_CODE: | 1880 | case PR_SET_MM_START_CODE: |
1881 | mm->start_code = addr; | 1881 | mm->start_code = addr; |
1882 | break; | 1882 | break; |
1883 | case PR_SET_MM_END_CODE: | 1883 | case PR_SET_MM_END_CODE: |
1884 | mm->end_code = addr; | 1884 | mm->end_code = addr; |
1885 | break; | 1885 | break; |
1886 | case PR_SET_MM_START_DATA: | 1886 | case PR_SET_MM_START_DATA: |
1887 | mm->start_data = addr; | 1887 | mm->start_data = addr; |
1888 | break; | 1888 | break; |
1889 | case PR_SET_MM_END_DATA: | 1889 | case PR_SET_MM_END_DATA: |
1890 | mm->end_data = addr; | 1890 | mm->end_data = addr; |
1891 | break; | 1891 | break; |
1892 | 1892 | ||
1893 | case PR_SET_MM_START_BRK: | 1893 | case PR_SET_MM_START_BRK: |
1894 | if (addr <= mm->end_data) | 1894 | if (addr <= mm->end_data) |
1895 | goto out; | 1895 | goto out; |
1896 | 1896 | ||
1897 | if (rlim < RLIM_INFINITY && | 1897 | if (rlim < RLIM_INFINITY && |
1898 | (mm->brk - addr) + | 1898 | (mm->brk - addr) + |
1899 | (mm->end_data - mm->start_data) > rlim) | 1899 | (mm->end_data - mm->start_data) > rlim) |
1900 | goto out; | 1900 | goto out; |
1901 | 1901 | ||
1902 | mm->start_brk = addr; | 1902 | mm->start_brk = addr; |
1903 | break; | 1903 | break; |
1904 | 1904 | ||
1905 | case PR_SET_MM_BRK: | 1905 | case PR_SET_MM_BRK: |
1906 | if (addr <= mm->end_data) | 1906 | if (addr <= mm->end_data) |
1907 | goto out; | 1907 | goto out; |
1908 | 1908 | ||
1909 | if (rlim < RLIM_INFINITY && | 1909 | if (rlim < RLIM_INFINITY && |
1910 | (addr - mm->start_brk) + | 1910 | (addr - mm->start_brk) + |
1911 | (mm->end_data - mm->start_data) > rlim) | 1911 | (mm->end_data - mm->start_data) > rlim) |
1912 | goto out; | 1912 | goto out; |
1913 | 1913 | ||
1914 | mm->brk = addr; | 1914 | mm->brk = addr; |
1915 | break; | 1915 | break; |
1916 | 1916 | ||
1917 | /* | 1917 | /* |
1918 | * If command line arguments and environment | 1918 | * If command line arguments and environment |
1919 | * are placed somewhere else on stack, we can | 1919 | * are placed somewhere else on stack, we can |
1920 | * set them up here, ARG_START/END to setup | 1920 | * set them up here, ARG_START/END to setup |
1921 | * command line argumets and ENV_START/END | 1921 | * command line argumets and ENV_START/END |
1922 | * for environment. | 1922 | * for environment. |
1923 | */ | 1923 | */ |
1924 | case PR_SET_MM_START_STACK: | 1924 | case PR_SET_MM_START_STACK: |
1925 | case PR_SET_MM_ARG_START: | 1925 | case PR_SET_MM_ARG_START: |
1926 | case PR_SET_MM_ARG_END: | 1926 | case PR_SET_MM_ARG_END: |
1927 | case PR_SET_MM_ENV_START: | 1927 | case PR_SET_MM_ENV_START: |
1928 | case PR_SET_MM_ENV_END: | 1928 | case PR_SET_MM_ENV_END: |
1929 | if (!vma) { | 1929 | if (!vma) { |
1930 | error = -EFAULT; | 1930 | error = -EFAULT; |
1931 | goto out; | 1931 | goto out; |
1932 | } | 1932 | } |
1933 | if (opt == PR_SET_MM_START_STACK) | 1933 | if (opt == PR_SET_MM_START_STACK) |
1934 | mm->start_stack = addr; | 1934 | mm->start_stack = addr; |
1935 | else if (opt == PR_SET_MM_ARG_START) | 1935 | else if (opt == PR_SET_MM_ARG_START) |
1936 | mm->arg_start = addr; | 1936 | mm->arg_start = addr; |
1937 | else if (opt == PR_SET_MM_ARG_END) | 1937 | else if (opt == PR_SET_MM_ARG_END) |
1938 | mm->arg_end = addr; | 1938 | mm->arg_end = addr; |
1939 | else if (opt == PR_SET_MM_ENV_START) | 1939 | else if (opt == PR_SET_MM_ENV_START) |
1940 | mm->env_start = addr; | 1940 | mm->env_start = addr; |
1941 | else if (opt == PR_SET_MM_ENV_END) | 1941 | else if (opt == PR_SET_MM_ENV_END) |
1942 | mm->env_end = addr; | 1942 | mm->env_end = addr; |
1943 | break; | 1943 | break; |
1944 | 1944 | ||
1945 | /* | 1945 | /* |
1946 | * This doesn't move auxiliary vector itself | 1946 | * This doesn't move auxiliary vector itself |
1947 | * since it's pinned to mm_struct, but allow | 1947 | * since it's pinned to mm_struct, but allow |
1948 | * to fill vector with new values. It's up | 1948 | * to fill vector with new values. It's up |
1949 | * to a caller to provide sane values here | 1949 | * to a caller to provide sane values here |
1950 | * otherwise user space tools which use this | 1950 | * otherwise user space tools which use this |
1951 | * vector might be unhappy. | 1951 | * vector might be unhappy. |
1952 | */ | 1952 | */ |
1953 | case PR_SET_MM_AUXV: { | 1953 | case PR_SET_MM_AUXV: { |
1954 | unsigned long user_auxv[AT_VECTOR_SIZE]; | 1954 | unsigned long user_auxv[AT_VECTOR_SIZE]; |
1955 | 1955 | ||
1956 | if (arg4 > sizeof(user_auxv)) | 1956 | if (arg4 > sizeof(user_auxv)) |
1957 | goto out; | 1957 | goto out; |
1958 | up_read(&mm->mmap_sem); | 1958 | up_read(&mm->mmap_sem); |
1959 | 1959 | ||
1960 | if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) | 1960 | if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) |
1961 | return -EFAULT; | 1961 | return -EFAULT; |
1962 | 1962 | ||
1963 | /* Make sure the last entry is always AT_NULL */ | 1963 | /* Make sure the last entry is always AT_NULL */ |
1964 | user_auxv[AT_VECTOR_SIZE - 2] = 0; | 1964 | user_auxv[AT_VECTOR_SIZE - 2] = 0; |
1965 | user_auxv[AT_VECTOR_SIZE - 1] = 0; | 1965 | user_auxv[AT_VECTOR_SIZE - 1] = 0; |
1966 | 1966 | ||
1967 | BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); | 1967 | BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); |
1968 | 1968 | ||
1969 | task_lock(current); | 1969 | task_lock(current); |
1970 | memcpy(mm->saved_auxv, user_auxv, arg4); | 1970 | memcpy(mm->saved_auxv, user_auxv, arg4); |
1971 | task_unlock(current); | 1971 | task_unlock(current); |
1972 | 1972 | ||
1973 | return 0; | 1973 | return 0; |
1974 | } | 1974 | } |
1975 | default: | 1975 | default: |
1976 | goto out; | 1976 | goto out; |
1977 | } | 1977 | } |
1978 | 1978 | ||
1979 | error = 0; | 1979 | error = 0; |
1980 | out: | 1980 | out: |
1981 | up_read(&mm->mmap_sem); | 1981 | up_read(&mm->mmap_sem); |
1982 | return error; | 1982 | return error; |
1983 | } | 1983 | } |
1984 | 1984 | ||
1985 | static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) | 1985 | static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) |
1986 | { | 1986 | { |
1987 | return put_user(me->clear_child_tid, tid_addr); | 1987 | return put_user(me->clear_child_tid, tid_addr); |
1988 | } | 1988 | } |
1989 | 1989 | ||
1990 | #else /* CONFIG_CHECKPOINT_RESTORE */ | 1990 | #else /* CONFIG_CHECKPOINT_RESTORE */ |
1991 | static int prctl_set_mm(int opt, unsigned long addr, | 1991 | static int prctl_set_mm(int opt, unsigned long addr, |
1992 | unsigned long arg4, unsigned long arg5) | 1992 | unsigned long arg4, unsigned long arg5) |
1993 | { | 1993 | { |
1994 | return -EINVAL; | 1994 | return -EINVAL; |
1995 | } | 1995 | } |
1996 | static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) | 1996 | static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) |
1997 | { | 1997 | { |
1998 | return -EINVAL; | 1998 | return -EINVAL; |
1999 | } | 1999 | } |
2000 | #endif | 2000 | #endif |
2001 | 2001 | ||
2002 | SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | 2002 | SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, |
2003 | unsigned long, arg4, unsigned long, arg5) | 2003 | unsigned long, arg4, unsigned long, arg5) |
2004 | { | 2004 | { |
2005 | struct task_struct *me = current; | 2005 | struct task_struct *me = current; |
2006 | unsigned char comm[sizeof(me->comm)]; | 2006 | unsigned char comm[sizeof(me->comm)]; |
2007 | long error; | 2007 | long error; |
2008 | 2008 | ||
2009 | error = security_task_prctl(option, arg2, arg3, arg4, arg5); | 2009 | error = security_task_prctl(option, arg2, arg3, arg4, arg5); |
2010 | if (error != -ENOSYS) | 2010 | if (error != -ENOSYS) |
2011 | return error; | 2011 | return error; |
2012 | 2012 | ||
2013 | error = 0; | 2013 | error = 0; |
2014 | switch (option) { | 2014 | switch (option) { |
2015 | case PR_SET_PDEATHSIG: | 2015 | case PR_SET_PDEATHSIG: |
2016 | if (!valid_signal(arg2)) { | 2016 | if (!valid_signal(arg2)) { |
2017 | error = -EINVAL; | 2017 | error = -EINVAL; |
2018 | break; | 2018 | break; |
2019 | } | 2019 | } |
2020 | me->pdeath_signal = arg2; | 2020 | me->pdeath_signal = arg2; |
2021 | break; | 2021 | break; |
2022 | case PR_GET_PDEATHSIG: | 2022 | case PR_GET_PDEATHSIG: |
2023 | error = put_user(me->pdeath_signal, (int __user *)arg2); | 2023 | error = put_user(me->pdeath_signal, (int __user *)arg2); |
2024 | break; | 2024 | break; |
2025 | case PR_GET_DUMPABLE: | 2025 | case PR_GET_DUMPABLE: |
2026 | error = get_dumpable(me->mm); | 2026 | error = get_dumpable(me->mm); |
2027 | break; | 2027 | break; |
2028 | case PR_SET_DUMPABLE: | 2028 | case PR_SET_DUMPABLE: |
2029 | if (arg2 < 0 || arg2 > 1) { | 2029 | if (arg2 < 0 || arg2 > 1) { |
2030 | error = -EINVAL; | 2030 | error = -EINVAL; |
2031 | break; | 2031 | break; |
2032 | } | 2032 | } |
2033 | set_dumpable(me->mm, arg2); | 2033 | set_dumpable(me->mm, arg2); |
2034 | break; | 2034 | break; |
2035 | 2035 | ||
2036 | case PR_SET_UNALIGN: | 2036 | case PR_SET_UNALIGN: |
2037 | error = SET_UNALIGN_CTL(me, arg2); | 2037 | error = SET_UNALIGN_CTL(me, arg2); |
2038 | break; | 2038 | break; |
2039 | case PR_GET_UNALIGN: | 2039 | case PR_GET_UNALIGN: |
2040 | error = GET_UNALIGN_CTL(me, arg2); | 2040 | error = GET_UNALIGN_CTL(me, arg2); |
2041 | break; | 2041 | break; |
2042 | case PR_SET_FPEMU: | 2042 | case PR_SET_FPEMU: |
2043 | error = SET_FPEMU_CTL(me, arg2); | 2043 | error = SET_FPEMU_CTL(me, arg2); |
2044 | break; | 2044 | break; |
2045 | case PR_GET_FPEMU: | 2045 | case PR_GET_FPEMU: |
2046 | error = GET_FPEMU_CTL(me, arg2); | 2046 | error = GET_FPEMU_CTL(me, arg2); |
2047 | break; | 2047 | break; |
2048 | case PR_SET_FPEXC: | 2048 | case PR_SET_FPEXC: |
2049 | error = SET_FPEXC_CTL(me, arg2); | 2049 | error = SET_FPEXC_CTL(me, arg2); |
2050 | break; | 2050 | break; |
2051 | case PR_GET_FPEXC: | 2051 | case PR_GET_FPEXC: |
2052 | error = GET_FPEXC_CTL(me, arg2); | 2052 | error = GET_FPEXC_CTL(me, arg2); |
2053 | break; | 2053 | break; |
2054 | case PR_GET_TIMING: | 2054 | case PR_GET_TIMING: |
2055 | error = PR_TIMING_STATISTICAL; | 2055 | error = PR_TIMING_STATISTICAL; |
2056 | break; | 2056 | break; |
2057 | case PR_SET_TIMING: | 2057 | case PR_SET_TIMING: |
2058 | if (arg2 != PR_TIMING_STATISTICAL) | 2058 | if (arg2 != PR_TIMING_STATISTICAL) |
2059 | error = -EINVAL; | 2059 | error = -EINVAL; |
2060 | break; | 2060 | break; |
2061 | case PR_SET_NAME: | 2061 | case PR_SET_NAME: |
2062 | comm[sizeof(me->comm)-1] = 0; | 2062 | comm[sizeof(me->comm)-1] = 0; |
2063 | if (strncpy_from_user(comm, (char __user *)arg2, | 2063 | if (strncpy_from_user(comm, (char __user *)arg2, |
2064 | sizeof(me->comm) - 1) < 0) | 2064 | sizeof(me->comm) - 1) < 0) |
2065 | return -EFAULT; | 2065 | return -EFAULT; |
2066 | set_task_comm(me, comm); | 2066 | set_task_comm(me, comm); |
2067 | proc_comm_connector(me); | 2067 | proc_comm_connector(me); |
2068 | break; | 2068 | break; |
2069 | case PR_GET_NAME: | 2069 | case PR_GET_NAME: |
2070 | get_task_comm(comm, me); | 2070 | get_task_comm(comm, me); |
2071 | if (copy_to_user((char __user *)arg2, comm, | 2071 | if (copy_to_user((char __user *)arg2, comm, |
2072 | sizeof(comm))) | 2072 | sizeof(comm))) |
2073 | return -EFAULT; | 2073 | return -EFAULT; |
2074 | break; | 2074 | break; |
2075 | case PR_GET_ENDIAN: | 2075 | case PR_GET_ENDIAN: |
2076 | error = GET_ENDIAN(me, arg2); | 2076 | error = GET_ENDIAN(me, arg2); |
2077 | break; | 2077 | break; |
2078 | case PR_SET_ENDIAN: | 2078 | case PR_SET_ENDIAN: |
2079 | error = SET_ENDIAN(me, arg2); | 2079 | error = SET_ENDIAN(me, arg2); |
2080 | break; | 2080 | break; |
2081 | case PR_GET_SECCOMP: | 2081 | case PR_GET_SECCOMP: |
2082 | error = prctl_get_seccomp(); | 2082 | error = prctl_get_seccomp(); |
2083 | break; | 2083 | break; |
2084 | case PR_SET_SECCOMP: | 2084 | case PR_SET_SECCOMP: |
2085 | error = prctl_set_seccomp(arg2, (char __user *)arg3); | 2085 | error = prctl_set_seccomp(arg2, (char __user *)arg3); |
2086 | break; | 2086 | break; |
2087 | case PR_GET_TSC: | 2087 | case PR_GET_TSC: |
2088 | error = GET_TSC_CTL(arg2); | 2088 | error = GET_TSC_CTL(arg2); |
2089 | break; | 2089 | break; |
2090 | case PR_SET_TSC: | 2090 | case PR_SET_TSC: |
2091 | error = SET_TSC_CTL(arg2); | 2091 | error = SET_TSC_CTL(arg2); |
2092 | break; | 2092 | break; |
2093 | case PR_TASK_PERF_EVENTS_DISABLE: | 2093 | case PR_TASK_PERF_EVENTS_DISABLE: |
2094 | error = perf_event_task_disable(); | 2094 | error = perf_event_task_disable(); |
2095 | break; | 2095 | break; |
2096 | case PR_TASK_PERF_EVENTS_ENABLE: | 2096 | case PR_TASK_PERF_EVENTS_ENABLE: |
2097 | error = perf_event_task_enable(); | 2097 | error = perf_event_task_enable(); |
2098 | break; | 2098 | break; |
2099 | case PR_GET_TIMERSLACK: | 2099 | case PR_GET_TIMERSLACK: |
2100 | error = current->timer_slack_ns; | 2100 | error = current->timer_slack_ns; |
2101 | break; | 2101 | break; |
2102 | case PR_SET_TIMERSLACK: | 2102 | case PR_SET_TIMERSLACK: |
2103 | if (arg2 <= 0) | 2103 | if (arg2 <= 0) |
2104 | current->timer_slack_ns = | 2104 | current->timer_slack_ns = |
2105 | current->default_timer_slack_ns; | 2105 | current->default_timer_slack_ns; |
2106 | else | 2106 | else |
2107 | current->timer_slack_ns = arg2; | 2107 | current->timer_slack_ns = arg2; |
2108 | break; | 2108 | break; |
2109 | case PR_MCE_KILL: | 2109 | case PR_MCE_KILL: |
2110 | if (arg4 | arg5) | 2110 | if (arg4 | arg5) |
2111 | return -EINVAL; | 2111 | return -EINVAL; |
2112 | switch (arg2) { | 2112 | switch (arg2) { |
2113 | case PR_MCE_KILL_CLEAR: | 2113 | case PR_MCE_KILL_CLEAR: |
2114 | if (arg3 != 0) | 2114 | if (arg3 != 0) |
2115 | return -EINVAL; | 2115 | return -EINVAL; |
2116 | current->flags &= ~PF_MCE_PROCESS; | 2116 | current->flags &= ~PF_MCE_PROCESS; |
2117 | break; | 2117 | break; |
2118 | case PR_MCE_KILL_SET: | 2118 | case PR_MCE_KILL_SET: |
2119 | current->flags |= PF_MCE_PROCESS; | 2119 | current->flags |= PF_MCE_PROCESS; |
2120 | if (arg3 == PR_MCE_KILL_EARLY) | 2120 | if (arg3 == PR_MCE_KILL_EARLY) |
2121 | current->flags |= PF_MCE_EARLY; | 2121 | current->flags |= PF_MCE_EARLY; |
2122 | else if (arg3 == PR_MCE_KILL_LATE) | 2122 | else if (arg3 == PR_MCE_KILL_LATE) |
2123 | current->flags &= ~PF_MCE_EARLY; | 2123 | current->flags &= ~PF_MCE_EARLY; |
2124 | else if (arg3 == PR_MCE_KILL_DEFAULT) | 2124 | else if (arg3 == PR_MCE_KILL_DEFAULT) |
2125 | current->flags &= | 2125 | current->flags &= |
2126 | ~(PF_MCE_EARLY|PF_MCE_PROCESS); | 2126 | ~(PF_MCE_EARLY|PF_MCE_PROCESS); |
2127 | else | 2127 | else |
2128 | return -EINVAL; | 2128 | return -EINVAL; |
2129 | break; | 2129 | break; |
2130 | default: | 2130 | default: |
2131 | return -EINVAL; | 2131 | return -EINVAL; |
2132 | } | 2132 | } |
2133 | break; | 2133 | break; |
2134 | case PR_MCE_KILL_GET: | 2134 | case PR_MCE_KILL_GET: |
2135 | if (arg2 | arg3 | arg4 | arg5) | 2135 | if (arg2 | arg3 | arg4 | arg5) |
2136 | return -EINVAL; | 2136 | return -EINVAL; |
2137 | if (current->flags & PF_MCE_PROCESS) | 2137 | if (current->flags & PF_MCE_PROCESS) |
2138 | error = (current->flags & PF_MCE_EARLY) ? | 2138 | error = (current->flags & PF_MCE_EARLY) ? |
2139 | PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; | 2139 | PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; |
2140 | else | 2140 | else |
2141 | error = PR_MCE_KILL_DEFAULT; | 2141 | error = PR_MCE_KILL_DEFAULT; |
2142 | break; | 2142 | break; |
2143 | case PR_SET_MM: | 2143 | case PR_SET_MM: |
2144 | error = prctl_set_mm(arg2, arg3, arg4, arg5); | 2144 | error = prctl_set_mm(arg2, arg3, arg4, arg5); |
2145 | break; | 2145 | break; |
2146 | case PR_GET_TID_ADDRESS: | 2146 | case PR_GET_TID_ADDRESS: |
2147 | error = prctl_get_tid_address(me, (int __user **)arg2); | 2147 | error = prctl_get_tid_address(me, (int __user **)arg2); |
2148 | break; | 2148 | break; |
2149 | case PR_SET_CHILD_SUBREAPER: | 2149 | case PR_SET_CHILD_SUBREAPER: |
2150 | me->signal->is_child_subreaper = !!arg2; | 2150 | me->signal->is_child_subreaper = !!arg2; |
2151 | break; | 2151 | break; |
2152 | case PR_GET_CHILD_SUBREAPER: | 2152 | case PR_GET_CHILD_SUBREAPER: |
2153 | error = put_user(me->signal->is_child_subreaper, | 2153 | error = put_user(me->signal->is_child_subreaper, |
2154 | (int __user *) arg2); | 2154 | (int __user *) arg2); |
2155 | break; | 2155 | break; |
2156 | case PR_SET_NO_NEW_PRIVS: | 2156 | case PR_SET_NO_NEW_PRIVS: |
2157 | if (arg2 != 1 || arg3 || arg4 || arg5) | 2157 | if (arg2 != 1 || arg3 || arg4 || arg5) |
2158 | return -EINVAL; | 2158 | return -EINVAL; |
2159 | 2159 | ||
2160 | current->no_new_privs = 1; | 2160 | current->no_new_privs = 1; |
2161 | break; | 2161 | break; |
2162 | case PR_GET_NO_NEW_PRIVS: | 2162 | case PR_GET_NO_NEW_PRIVS: |
2163 | if (arg2 || arg3 || arg4 || arg5) | 2163 | if (arg2 || arg3 || arg4 || arg5) |
2164 | return -EINVAL; | 2164 | return -EINVAL; |
2165 | return current->no_new_privs ? 1 : 0; | 2165 | return current->no_new_privs ? 1 : 0; |
2166 | default: | 2166 | default: |
2167 | error = -EINVAL; | 2167 | error = -EINVAL; |
2168 | break; | 2168 | break; |
2169 | } | 2169 | } |
2170 | return error; | 2170 | return error; |
2171 | } | 2171 | } |
2172 | 2172 | ||
2173 | SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, | 2173 | SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, |
2174 | struct getcpu_cache __user *, unused) | 2174 | struct getcpu_cache __user *, unused) |
2175 | { | 2175 | { |
2176 | int err = 0; | 2176 | int err = 0; |
2177 | int cpu = raw_smp_processor_id(); | 2177 | int cpu = raw_smp_processor_id(); |
2178 | if (cpup) | 2178 | if (cpup) |
2179 | err |= put_user(cpu, cpup); | 2179 | err |= put_user(cpu, cpup); |
2180 | if (nodep) | 2180 | if (nodep) |
2181 | err |= put_user(cpu_to_node(cpu), nodep); | 2181 | err |= put_user(cpu_to_node(cpu), nodep); |
2182 | return err ? -EFAULT : 0; | 2182 | return err ? -EFAULT : 0; |
2183 | } | 2183 | } |
2184 | 2184 | ||
2185 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; | 2185 | char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; |
2186 | 2186 | ||
2187 | static void argv_cleanup(struct subprocess_info *info) | 2187 | static void argv_cleanup(struct subprocess_info *info) |
2188 | { | 2188 | { |
2189 | argv_free(info->argv); | 2189 | argv_free(info->argv); |
2190 | } | 2190 | } |
2191 | 2191 | ||
2192 | static int __orderly_poweroff(void) | 2192 | static int __orderly_poweroff(void) |
2193 | { | 2193 | { |
2194 | int argc; | 2194 | int argc; |
2195 | char **argv; | 2195 | char **argv; |
2196 | static char *envp[] = { | 2196 | static char *envp[] = { |
2197 | "HOME=/", | 2197 | "HOME=/", |
2198 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", | 2198 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", |
2199 | NULL | 2199 | NULL |
2200 | }; | 2200 | }; |
2201 | int ret; | 2201 | int ret; |
2202 | 2202 | ||
2203 | argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); | 2203 | argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); |
2204 | if (argv == NULL) { | 2204 | if (argv == NULL) { |
2205 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", | 2205 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", |
2206 | __func__, poweroff_cmd); | 2206 | __func__, poweroff_cmd); |
2207 | return -ENOMEM; | 2207 | return -ENOMEM; |
2208 | } | 2208 | } |
2209 | 2209 | ||
2210 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, | 2210 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, |
2211 | NULL, argv_cleanup, NULL); | 2211 | NULL, argv_cleanup, NULL); |
2212 | if (ret == -ENOMEM) | 2212 | if (ret == -ENOMEM) |
2213 | argv_free(argv); | 2213 | argv_free(argv); |
2214 | 2214 | ||
2215 | return ret; | 2215 | return ret; |
2216 | } | 2216 | } |
2217 | 2217 | ||
2218 | /** | 2218 | /** |
2219 | * orderly_poweroff - Trigger an orderly system poweroff | 2219 | * orderly_poweroff - Trigger an orderly system poweroff |
2220 | * @force: force poweroff if command execution fails | 2220 | * @force: force poweroff if command execution fails |
2221 | * | 2221 | * |
2222 | * This may be called from any context to trigger a system shutdown. | 2222 | * This may be called from any context to trigger a system shutdown. |
2223 | * If the orderly shutdown fails, it will force an immediate shutdown. | 2223 | * If the orderly shutdown fails, it will force an immediate shutdown. |
2224 | */ | 2224 | */ |
2225 | int orderly_poweroff(bool force) | 2225 | int orderly_poweroff(bool force) |
2226 | { | 2226 | { |
2227 | int ret = __orderly_poweroff(); | 2227 | int ret = __orderly_poweroff(); |
2228 | 2228 | ||
2229 | if (ret && force) { | 2229 | if (ret && force) { |
2230 | printk(KERN_WARNING "Failed to start orderly shutdown: " | 2230 | printk(KERN_WARNING "Failed to start orderly shutdown: " |
2231 | "forcing the issue\n"); | 2231 | "forcing the issue\n"); |
2232 | 2232 | ||
2233 | /* | 2233 | /* |
2234 | * I guess this should try to kick off some daemon to sync and | 2234 | * I guess this should try to kick off some daemon to sync and |
2235 | * poweroff asap. Or not even bother syncing if we're doing an | 2235 | * poweroff asap. Or not even bother syncing if we're doing an |
2236 | * emergency shutdown? | 2236 | * emergency shutdown? |
2237 | */ | 2237 | */ |
2238 | emergency_sync(); | 2238 | emergency_sync(); |
2239 | kernel_power_off(); | 2239 | kernel_power_off(); |
2240 | } | 2240 | } |
2241 | 2241 | ||
2242 | return ret; | 2242 | return ret; |
2243 | } | 2243 | } |
2244 | EXPORT_SYMBOL_GPL(orderly_poweroff); | 2244 | EXPORT_SYMBOL_GPL(orderly_poweroff); |
2245 | 2245 |