Blame view
kernel/fork.c
51.7 KB
1da177e4c
|
1 2 3 4 5 6 7 8 9 10 11 12 |
/* * linux/kernel/fork.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * 'fork.c' contains the help-routines for the 'fork' system call * (see also entry.S and others). * Fork is rather simple, once you get the hang of it, but the memory * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' */ |
1da177e4c
|
13 14 15 |
#include <linux/slab.h> #include <linux/init.h> #include <linux/unistd.h> |
1da177e4c
|
16 17 18 |
#include <linux/module.h> #include <linux/vmalloc.h> #include <linux/completion.h> |
1da177e4c
|
19 20 21 22 |
#include <linux/personality.h> #include <linux/mempolicy.h> #include <linux/sem.h> #include <linux/file.h> |
9f3acc314
|
23 |
#include <linux/fdtable.h> |
da9cbc873
|
24 |
#include <linux/iocontext.h> |
1da177e4c
|
25 26 27 |
#include <linux/key.h> #include <linux/binfmts.h> #include <linux/mman.h> |
cddb8a5c1
|
28 |
#include <linux/mmu_notifier.h> |
1da177e4c
|
29 |
#include <linux/fs.h> |
615d6e875
|
30 31 |
#include <linux/mm.h> #include <linux/vmacache.h> |
ab516013a
|
32 |
#include <linux/nsproxy.h> |
c59ede7b7
|
33 |
#include <linux/capability.h> |
1da177e4c
|
34 |
#include <linux/cpu.h> |
b4f48b636
|
35 |
#include <linux/cgroup.h> |
1da177e4c
|
36 |
#include <linux/security.h> |
a1e78772d
|
37 |
#include <linux/hugetlb.h> |
e2cfabdfd
|
38 |
#include <linux/seccomp.h> |
1da177e4c
|
39 40 41 42 |
#include <linux/swap.h> #include <linux/syscalls.h> #include <linux/jiffies.h> #include <linux/futex.h> |
8141c7f3e
|
43 |
#include <linux/compat.h> |
207205a2b
|
44 |
#include <linux/kthread.h> |
7c3ab7381
|
45 |
#include <linux/task_io_accounting_ops.h> |
ab2af1f50
|
46 |
#include <linux/rcupdate.h> |
1da177e4c
|
47 48 49 |
#include <linux/ptrace.h> #include <linux/mount.h> #include <linux/audit.h> |
78fb74669
|
50 |
#include <linux/memcontrol.h> |
f201ae235
|
51 |
#include <linux/ftrace.h> |
5e2bf0142
|
52 |
#include <linux/proc_fs.h> |
1da177e4c
|
53 54 |
#include <linux/profile.h> #include <linux/rmap.h> |
f8af4da3b
|
55 |
#include <linux/ksm.h> |
1da177e4c
|
56 |
#include <linux/acct.h> |
8f0ab5147
|
57 |
#include <linux/tsacct_kern.h> |
9f46080c4
|
58 |
#include <linux/cn_proc.h> |
ba96a0c88
|
59 |
#include <linux/freezer.h> |
ca74e92b4
|
60 |
#include <linux/delayacct.h> |
ad4ecbcba
|
61 |
#include <linux/taskstats_kern.h> |
0a4254058
|
62 |
#include <linux/random.h> |
522ed7767
|
63 |
#include <linux/tty.h> |
fd0928df9
|
64 |
#include <linux/blkdev.h> |
5ad4e53bd
|
65 |
#include <linux/fs_struct.h> |
7c9f8861e
|
66 |
#include <linux/magic.h> |
cdd6c482c
|
67 |
#include <linux/perf_event.h> |
42c4ab41a
|
68 |
#include <linux/posix-timers.h> |
8e7cac798
|
69 |
#include <linux/user-return-notifier.h> |
3d5992d2a
|
70 |
#include <linux/oom.h> |
ba76149f4
|
71 |
#include <linux/khugepaged.h> |
d80e731ec
|
72 |
#include <linux/signalfd.h> |
0326f5a94
|
73 |
#include <linux/uprobes.h> |
a27bb332c
|
74 |
#include <linux/aio.h> |
52f5684c8
|
75 |
#include <linux/compiler.h> |
16db3d3f1
|
76 |
#include <linux/sysctl.h> |
5c9a8750a
|
77 |
#include <linux/kcov.h> |
1da177e4c
|
78 79 80 81 82 83 84 |
#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> |
ad8d75fff
|
85 |
#include <trace/events/sched.h> |
43d2b1132
|
86 87 |
#define CREATE_TRACE_POINTS #include <trace/events/task.h> |
1da177e4c
|
88 |
/* |
ac1b398de
|
89 90 91 92 93 94 95 96 97 98 |
* Minimum number of threads to boot the kernel */ #define MIN_THREADS 20 /* * Maximum number of threads */ #define MAX_THREADS FUTEX_TID_MASK /* |
1da177e4c
|
99 100 101 |
* Protected counters by write_lock_irq(&tasklist_lock) */ unsigned long total_forks; /* Handle normal Linux uptimes. */ |
fb0a685cb
|
102 |
int nr_threads; /* The idle threads do not count.. */ |
1da177e4c
|
103 104 105 106 |
int max_threads; /* tunable limit on nr_threads */ DEFINE_PER_CPU(unsigned long, process_counts) = 0; |
c59923a15
|
107 |
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
db1466b3e
|
108 109 110 111 112 113 114 115 |
#ifdef CONFIG_PROVE_RCU int lockdep_tasklist_lock_is_held(void) { return lockdep_is_held(&tasklist_lock); } EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); #endif /* #ifdef CONFIG_PROVE_RCU */ |
1da177e4c
|
116 117 118 119 120 |
int nr_processes(void) { int cpu; int total = 0; |
1d5107509
|
121 |
for_each_possible_cpu(cpu) |
1da177e4c
|
122 123 124 125 |
total += per_cpu(process_counts, cpu); return total; } |
f19b9f74b
|
126 127 128 |
void __weak arch_release_task_struct(struct task_struct *tsk) { } |
f5e102873
|
129 |
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR |
e18b890bb
|
130 |
static struct kmem_cache *task_struct_cachep; |
41101809a
|
131 132 133 134 135 |
static inline struct task_struct *alloc_task_struct_node(int node) { return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); } |
41101809a
|
136 137 |
static inline void free_task_struct(struct task_struct *tsk) { |
41101809a
|
138 139 |
kmem_cache_free(task_struct_cachep, tsk); } |
1da177e4c
|
140 |
#endif |
b235beea9
|
141 |
void __weak arch_release_thread_stack(unsigned long *stack) |
f19b9f74b
|
142 143 |
{ } |
b235beea9
|
144 |
#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR |
41101809a
|
145 |
|
0d15d74a1
|
146 147 148 149 150 |
/* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ # if THREAD_SIZE >= PAGE_SIZE |
b235beea9
|
151 |
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, |
b6a84016b
|
152 |
int node) |
b69c49b78
|
153 |
{ |
4949148ad
|
154 155 |
struct page *page = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); |
b6a84016b
|
156 157 |
return page ? page_address(page) : NULL; |
b69c49b78
|
158 |
} |
b235beea9
|
159 |
static inline void free_thread_stack(unsigned long *stack) |
b69c49b78
|
160 |
{ |
efdc94907
|
161 |
__free_pages(virt_to_page(stack), THREAD_SIZE_ORDER); |
b69c49b78
|
162 |
} |
0d15d74a1
|
163 |
# else |
b235beea9
|
164 |
static struct kmem_cache *thread_stack_cache; |
0d15d74a1
|
165 |
|
9521d3997
|
166 |
static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, |
0d15d74a1
|
167 168 |
int node) { |
b235beea9
|
169 |
return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); |
0d15d74a1
|
170 |
} |
9521d3997
|
171 |
static void free_thread_stack(unsigned long *stack) |
0d15d74a1
|
172 |
{ |
b235beea9
|
173 |
kmem_cache_free(thread_stack_cache, stack); |
0d15d74a1
|
174 |
} |
b235beea9
|
175 |
void thread_stack_cache_init(void) |
0d15d74a1
|
176 |
{ |
b235beea9
|
177 |
thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, |
0d15d74a1
|
178 |
THREAD_SIZE, 0, NULL); |
b235beea9
|
179 |
BUG_ON(thread_stack_cache == NULL); |
0d15d74a1
|
180 181 |
} # endif |
b69c49b78
|
182 |
#endif |
1da177e4c
|
183 |
/* SLAB cache for signal_struct structures (tsk->signal) */ |
e18b890bb
|
184 |
static struct kmem_cache *signal_cachep; |
1da177e4c
|
185 186 |
/* SLAB cache for sighand_struct structures (tsk->sighand) */ |
e18b890bb
|
187 |
struct kmem_cache *sighand_cachep; |
1da177e4c
|
188 189 |
/* SLAB cache for files_struct structures (tsk->files) */ |
e18b890bb
|
190 |
struct kmem_cache *files_cachep; |
1da177e4c
|
191 192 |
/* SLAB cache for fs_struct structures (tsk->fs) */ |
e18b890bb
|
193 |
struct kmem_cache *fs_cachep; |
1da177e4c
|
194 195 |
/* SLAB cache for vm_area_struct structures */ |
e18b890bb
|
196 |
struct kmem_cache *vm_area_cachep; |
1da177e4c
|
197 198 |
/* SLAB cache for mm_struct structures (tsk->mm) */ |
e18b890bb
|
199 |
static struct kmem_cache *mm_cachep; |
1da177e4c
|
200 |
|
b235beea9
|
201 |
static void account_kernel_stack(unsigned long *stack, int account) |
c6a7f5728
|
202 |
{ |
efdc94907
|
203 204 |
/* All stack pages are in the same zone and belong to the same memcg. */ struct page *first_page = virt_to_page(stack); |
c6a7f5728
|
205 |
|
efdc94907
|
206 |
mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, |
d30dd8be0
|
207 |
THREAD_SIZE / 1024 * account); |
efdc94907
|
208 209 210 211 |
memcg_kmem_update_page_stat( first_page, MEMCG_KERNEL_STACK_KB, account * (THREAD_SIZE / 1024)); |
c6a7f5728
|
212 |
} |
1da177e4c
|
213 214 |
void free_task(struct task_struct *tsk) { |
c6a7f5728
|
215 |
account_kernel_stack(tsk->stack, -1); |
b235beea9
|
216 217 |
arch_release_thread_stack(tsk->stack); free_thread_stack(tsk->stack); |
23f78d4a0
|
218 |
rt_mutex_debug_task_free(tsk); |
fb52607af
|
219 |
ftrace_graph_exit_task(tsk); |
e2cfabdfd
|
220 |
put_seccomp_filter(tsk); |
f19b9f74b
|
221 |
arch_release_task_struct(tsk); |
1da177e4c
|
222 223 224 |
free_task_struct(tsk); } EXPORT_SYMBOL(free_task); |
ea6d290ca
|
225 226 |
static inline void free_signal_struct(struct signal_struct *sig) { |
97101eb41
|
227 |
taskstats_tgid_free(sig); |
1c5354de9
|
228 |
sched_autogroup_exit(sig); |
ea6d290ca
|
229 230 231 232 233 |
kmem_cache_free(signal_cachep, sig); } static inline void put_signal_struct(struct signal_struct *sig) { |
1c5354de9
|
234 |
if (atomic_dec_and_test(&sig->sigcnt)) |
ea6d290ca
|
235 236 |
free_signal_struct(sig); } |
158d9ebd1
|
237 |
void __put_task_struct(struct task_struct *tsk) |
1da177e4c
|
238 |
{ |
270f722d4
|
239 |
WARN_ON(!tsk->exit_state); |
1da177e4c
|
240 241 |
WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); |
2e91fa7f6
|
242 |
cgroup_free(tsk); |
156654f49
|
243 |
task_numa_free(tsk); |
1a2a4d06e
|
244 |
security_task_free(tsk); |
e0e817392
|
245 |
exit_creds(tsk); |
35df17c57
|
246 |
delayacct_tsk_free(tsk); |
ea6d290ca
|
247 |
put_signal_struct(tsk->signal); |
1da177e4c
|
248 249 250 251 |
if (!profile_handoff_task(tsk)) free_task(tsk); } |
77c100c83
|
252 |
EXPORT_SYMBOL_GPL(__put_task_struct); |
1da177e4c
|
253 |
|
6c0a9fa62
|
254 |
void __init __weak arch_task_cache_init(void) { } |
61c4628b5
|
255 |
|
ff691f6e0
|
256 257 258 |
/* * set_max_threads */ |
16db3d3f1
|
259 |
static void set_max_threads(unsigned int max_threads_suggested) |
ff691f6e0
|
260 |
{ |
ac1b398de
|
261 |
u64 threads; |
ff691f6e0
|
262 263 |
/* |
ac1b398de
|
264 265 |
* The number of threads shall be limited such that the thread * structures may only consume a small part of the available memory. |
ff691f6e0
|
266 |
*/ |
ac1b398de
|
267 268 269 270 271 |
if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) threads = MAX_THREADS; else threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, (u64) THREAD_SIZE * 8UL); |
16db3d3f1
|
272 273 |
if (threads > max_threads_suggested) threads = max_threads_suggested; |
ac1b398de
|
274 |
max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); |
ff691f6e0
|
275 |
} |
5aaeb5c01
|
276 277 278 279 |
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT /* Initialized by the architecture: */ int arch_task_struct_size __read_mostly; #endif |
0c8c0f03e
|
280 |
|
ff691f6e0
|
281 |
void __init fork_init(void) |
1da177e4c
|
282 |
{ |
f5e102873
|
283 |
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR |
1da177e4c
|
284 285 286 287 |
#ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES #endif /* create a slab on which task_structs can be allocated */ |
5d097056c
|
288 289 290 |
task_struct_cachep = kmem_cache_create("task_struct", arch_task_struct_size, ARCH_MIN_TASKALIGN, SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); |
1da177e4c
|
291 |
#endif |
61c4628b5
|
292 293 |
/* do the arch specific task caches init */ arch_task_cache_init(); |
16db3d3f1
|
294 |
set_max_threads(MAX_THREADS); |
1da177e4c
|
295 296 297 298 299 300 |
init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; } |
52f5684c8
|
301 |
int __weak arch_dup_task_struct(struct task_struct *dst, |
61c4628b5
|
302 303 304 305 306 |
struct task_struct *src) { *dst = *src; return 0; } |
d4311ff1a
|
307 308 309 310 311 312 313 |
void set_task_stack_end_magic(struct task_struct *tsk) { unsigned long *stackend; stackend = end_of_stack(tsk); *stackend = STACK_END_MAGIC; /* for overflow detection */ } |
725fc629f
|
314 |
static struct task_struct *dup_task_struct(struct task_struct *orig, int node) |
1da177e4c
|
315 316 |
{ struct task_struct *tsk; |
b235beea9
|
317 |
unsigned long *stack; |
3e26c149c
|
318 |
int err; |
1da177e4c
|
319 |
|
725fc629f
|
320 321 |
if (node == NUMA_NO_NODE) node = tsk_fork_get_node(orig); |
504f52b54
|
322 |
tsk = alloc_task_struct_node(node); |
1da177e4c
|
323 324 |
if (!tsk) return NULL; |
b235beea9
|
325 326 |
stack = alloc_thread_stack_node(tsk, node); if (!stack) |
f19b9f74b
|
327 |
goto free_tsk; |
1da177e4c
|
328 |
|
fb0a685cb
|
329 |
err = arch_dup_task_struct(tsk, orig); |
164c33c6a
|
330 |
if (err) |
b235beea9
|
331 |
goto free_stack; |
164c33c6a
|
332 |
|
b235beea9
|
333 |
tsk->stack = stack; |
dbd952127
|
334 335 336 337 338 339 340 341 342 |
#ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under * the sighand lock in case orig has changed between now and * then. Until then, filter must be NULL to avoid messing up * the usage counts on the error path calling free_task. */ tsk->seccomp.filter = NULL; #endif |
87bec58a5
|
343 344 |
setup_thread_stack(tsk, orig); |
8e7cac798
|
345 |
clear_user_return_notifier(tsk); |
f26f9aff6
|
346 |
clear_tsk_need_resched(tsk); |
d4311ff1a
|
347 |
set_task_stack_end_magic(tsk); |
1da177e4c
|
348 |
|
0a4254058
|
349 350 351 |
#ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int(); #endif |
fb0a685cb
|
352 353 354 355 356 |
/* * One for us, one for whoever does the "release_task()" (usually * parent) */ atomic_set(&tsk->usage, 2); |
6c5c93415
|
357 |
#ifdef CONFIG_BLK_DEV_IO_TRACE |
2056a782f
|
358 |
tsk->btrace_seq = 0; |
6c5c93415
|
359 |
#endif |
a0aa7f68a
|
360 |
tsk->splice_pipe = NULL; |
5640f7685
|
361 |
tsk->task_frag.page = NULL; |
093e5840a
|
362 |
tsk->wake_q.next = NULL; |
c6a7f5728
|
363 |
|
b235beea9
|
364 |
account_kernel_stack(stack, 1); |
c6a7f5728
|
365 |
|
5c9a8750a
|
366 |
kcov_task_init(tsk); |
1da177e4c
|
367 |
return tsk; |
61c4628b5
|
368 |
|
b235beea9
|
369 370 |
free_stack: free_thread_stack(stack); |
f19b9f74b
|
371 |
free_tsk: |
61c4628b5
|
372 373 |
free_task_struct(tsk); return NULL; |
1da177e4c
|
374 375 376 |
} #ifdef CONFIG_MMU |
a39bc5169
|
377 |
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) |
1da177e4c
|
378 |
{ |
297c5eee3
|
379 |
struct vm_area_struct *mpnt, *tmp, *prev, **pprev; |
1da177e4c
|
380 381 382 |
struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; |
1da177e4c
|
383 |
|
32cdba1e0
|
384 |
uprobe_start_dup_mmap(); |
7c0512679
|
385 386 387 388 |
if (down_write_killable(&oldmm->mmap_sem)) { retval = -EINTR; goto fail_uprobe_end; } |
ec8c0446b
|
389 |
flush_cache_dup_mm(oldmm); |
f8ac4ec9c
|
390 |
uprobe_dup_mmap(oldmm, mm); |
ad3394517
|
391 392 393 394 |
/* * Not linked in yet - no deadlock potential: */ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); |
7ee782325
|
395 |
|
90f31d0ea
|
396 397 |
/* No ordering required: file already has been exposed. */ RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); |
4f7d46143
|
398 |
mm->total_vm = oldmm->total_vm; |
846383359
|
399 |
mm->data_vm = oldmm->data_vm; |
4f7d46143
|
400 401 |
mm->exec_vm = oldmm->exec_vm; mm->stack_vm = oldmm->stack_vm; |
1da177e4c
|
402 403 404 |
rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; pprev = &mm->mmap; |
f8af4da3b
|
405 406 407 |
retval = ksm_fork(mm, oldmm); if (retval) goto out; |
ba76149f4
|
408 409 410 |
retval = khugepaged_fork(mm, oldmm); if (retval) goto out; |
1da177e4c
|
411 |
|
297c5eee3
|
412 |
prev = NULL; |
fd3e42fcc
|
413 |
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
1da177e4c
|
414 415 416 |
struct file *file; if (mpnt->vm_flags & VM_DONTCOPY) { |
846383359
|
417 |
vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); |
1da177e4c
|
418 419 420 421 |
continue; } charge = 0; if (mpnt->vm_flags & VM_ACCOUNT) { |
b2412b7fa
|
422 |
unsigned long len = vma_pages(mpnt); |
191c54244
|
423 |
if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ |
1da177e4c
|
424 425 426 |
goto fail_nomem; charge = len; } |
e94b17660
|
427 |
tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); |
1da177e4c
|
428 429 430 |
if (!tmp) goto fail_nomem; *tmp = *mpnt; |
5beb49305
|
431 |
INIT_LIST_HEAD(&tmp->anon_vma_chain); |
ef0855d33
|
432 433 |
retval = vma_dup_policy(mpnt, tmp); if (retval) |
1da177e4c
|
434 |
goto fail_nomem_policy; |
a247c3a97
|
435 |
tmp->vm_mm = mm; |
5beb49305
|
436 437 |
if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; |
de60f5f10
|
438 439 |
tmp->vm_flags &= ~(VM_LOCKED|VM_LOCKONFAULT|VM_UFFD_MISSING|VM_UFFD_WP); |
297c5eee3
|
440 |
tmp->vm_next = tmp->vm_prev = NULL; |
745f234be
|
441 |
tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; |
1da177e4c
|
442 443 |
file = tmp->vm_file; if (file) { |
496ad9aa8
|
444 |
struct inode *inode = file_inode(file); |
b88ed2059
|
445 |
struct address_space *mapping = file->f_mapping; |
1da177e4c
|
446 447 448 |
get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); |
83cde9e8b
|
449 |
i_mmap_lock_write(mapping); |
b88ed2059
|
450 |
if (tmp->vm_flags & VM_SHARED) |
4bb5f5d93
|
451 |
atomic_inc(&mapping->i_mmap_writable); |
b88ed2059
|
452 453 |
flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ |
27ba0644e
|
454 455 |
vma_interval_tree_insert_after(tmp, mpnt, &mapping->i_mmap); |
b88ed2059
|
456 |
flush_dcache_mmap_unlock(mapping); |
83cde9e8b
|
457 |
i_mmap_unlock_write(mapping); |
1da177e4c
|
458 459 460 |
} /* |
a1e78772d
|
461 462 463 464 465 466 467 468 |
* Clear hugetlb-related page reserves for children. This only * affects MAP_PRIVATE mappings. Faults generated by the child * are not guaranteed to succeed, even if read-only */ if (is_vm_hugetlb_page(tmp)) reset_vma_resv_huge_pages(tmp); /* |
7ee782325
|
469 |
* Link in the new vma and copy the page table entries. |
1da177e4c
|
470 |
*/ |
1da177e4c
|
471 472 |
*pprev = tmp; pprev = &tmp->vm_next; |
297c5eee3
|
473 474 |
tmp->vm_prev = prev; prev = tmp; |
1da177e4c
|
475 476 477 478 479 480 |
__vma_link_rb(mm, tmp, rb_link, rb_parent); rb_link = &tmp->vm_rb.rb_right; rb_parent = &tmp->vm_rb; mm->map_count++; |
0b0db14c5
|
481 |
retval = copy_page_range(mm, oldmm, mpnt); |
1da177e4c
|
482 483 484 485 486 487 488 |
if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); if (retval) goto out; } |
d6dd61c83
|
489 490 |
/* a new mm has just been created */ arch_dup_mmap(oldmm, mm); |
1da177e4c
|
491 |
retval = 0; |
1da177e4c
|
492 |
out: |
7ee782325
|
493 |
up_write(&mm->mmap_sem); |
fd3e42fcc
|
494 |
flush_tlb_mm(oldmm); |
1da177e4c
|
495 |
up_write(&oldmm->mmap_sem); |
7c0512679
|
496 |
fail_uprobe_end: |
32cdba1e0
|
497 |
uprobe_end_dup_mmap(); |
1da177e4c
|
498 |
return retval; |
5beb49305
|
499 |
fail_nomem_anon_vma_fork: |
ef0855d33
|
500 |
mpol_put(vma_policy(tmp)); |
1da177e4c
|
501 502 503 504 505 506 507 |
fail_nomem_policy: kmem_cache_free(vm_area_cachep, tmp); fail_nomem: retval = -ENOMEM; vm_unacct_memory(charge); goto out; } |
fb0a685cb
|
508 |
static inline int mm_alloc_pgd(struct mm_struct *mm) |
1da177e4c
|
509 510 511 512 513 514 |
{ mm->pgd = pgd_alloc(mm); if (unlikely(!mm->pgd)) return -ENOMEM; return 0; } |
fb0a685cb
|
515 |
static inline void mm_free_pgd(struct mm_struct *mm) |
1da177e4c
|
516 |
{ |
5e5419734
|
517 |
pgd_free(mm, mm->pgd); |
1da177e4c
|
518 519 |
} #else |
90f31d0ea
|
520 521 522 523 524 525 526 |
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) { down_write(&oldmm->mmap_sem); RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); up_write(&oldmm->mmap_sem); return 0; } |
1da177e4c
|
527 528 529 |
#define mm_alloc_pgd(mm) (0) #define mm_free_pgd(mm) #endif /* CONFIG_MMU */ |
23ff44402
|
530 |
__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); |
1da177e4c
|
531 |
|
e94b17660
|
532 |
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) |
1da177e4c
|
533 |
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) |
4cb0e11b1
|
534 535 536 537 538 539 540 541 542 543 544 |
static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; static int __init coredump_filter_setup(char *s) { default_dump_filter = (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & MMF_DUMP_FILTER_MASK; return 1; } __setup("coredump_filter=", coredump_filter_setup); |
1da177e4c
|
545 |
#include <linux/init_task.h> |
858f09930
|
546 547 548 549 |
static void mm_init_aio(struct mm_struct *mm) { #ifdef CONFIG_AIO spin_lock_init(&mm->ioctx_lock); |
db446a08c
|
550 |
mm->ioctx_table = NULL; |
858f09930
|
551 552 |
#endif } |
33144e842
|
553 554 555 556 557 558 |
static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { #ifdef CONFIG_MEMCG mm->owner = p; #endif } |
fb0a685cb
|
559 |
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) |
1da177e4c
|
560 |
{ |
41f727fde
|
561 562 563 |
mm->mmap = NULL; mm->mm_rb = RB_ROOT; mm->vmacache_seqnum = 0; |
1da177e4c
|
564 565 566 567 |
atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); |
999d9fc16
|
568 |
mm->core_state = NULL; |
e1f56c89b
|
569 |
atomic_long_set(&mm->nr_ptes, 0); |
2d2f5119b
|
570 |
mm_nr_pmds_init(mm); |
41f727fde
|
571 572 |
mm->map_count = 0; mm->locked_vm = 0; |
ce65cefa5
|
573 |
mm->pinned_vm = 0; |
d559db086
|
574 |
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
1da177e4c
|
575 |
spin_lock_init(&mm->page_table_lock); |
41f727fde
|
576 |
mm_init_cpumask(mm); |
858f09930
|
577 |
mm_init_aio(mm); |
cf475ad28
|
578 |
mm_init_owner(mm, p); |
41f727fde
|
579 |
mmu_notifier_mm_init(mm); |
208414059
|
580 |
clear_tlb_flush_pending(mm); |
41f727fde
|
581 582 583 |
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; #endif |
1da177e4c
|
584 |
|
a0715cc22
|
585 586 587 588 589 |
if (current->mm) { mm->flags = current->mm->flags & MMF_INIT_MASK; mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; } else { mm->flags = default_dump_filter; |
1da177e4c
|
590 |
mm->def_flags = 0; |
a0715cc22
|
591 |
} |
41f727fde
|
592 593 594 595 596 |
if (mm_alloc_pgd(mm)) goto fail_nopgd; if (init_new_context(p, mm)) goto fail_nocontext; |
78fb74669
|
597 |
|
41f727fde
|
598 599 600 601 602 |
return mm; fail_nocontext: mm_free_pgd(mm); fail_nopgd: |
1da177e4c
|
603 604 605 |
free_mm(mm); return NULL; } |
c3f0327f8
|
606 607 608 609 610 611 612 613 614 615 616 617 |
static void check_mm(struct mm_struct *mm) { int i; for (i = 0; i < NR_MM_COUNTERS; i++) { long x = atomic_long_read(&mm->rss_stat.count[i]); if (unlikely(x)) printk(KERN_ALERT "BUG: Bad rss-counter state " "mm:%p idx:%d val:%ld ", mm, i, x); } |
b30fe6c7c
|
618 619 620 621 622 623 624 625 626 |
if (atomic_long_read(&mm->nr_ptes)) pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld ", atomic_long_read(&mm->nr_ptes)); if (mm_nr_pmds(mm)) pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld ", mm_nr_pmds(mm)); |
e009bb30c
|
627 |
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS |
96dad67ff
|
628 |
VM_BUG_ON_MM(mm->pmd_huge_pte, mm); |
c3f0327f8
|
629 630 |
#endif } |
1da177e4c
|
631 632 633 |
/* * Allocate and initialize an mm_struct. */ |
fb0a685cb
|
634 |
struct mm_struct *mm_alloc(void) |
1da177e4c
|
635 |
{ |
fb0a685cb
|
636 |
struct mm_struct *mm; |
1da177e4c
|
637 638 |
mm = allocate_mm(); |
de03c72cf
|
639 640 641 642 |
if (!mm) return NULL; memset(mm, 0, sizeof(*mm)); |
6345d24da
|
643 |
return mm_init(mm, current); |
1da177e4c
|
644 645 646 647 648 649 650 |
} /* * Called when the last reference to the mm * is dropped: either by a lazy thread or by * mmput. Free the page directory and the mm. */ |
7ad5b3a50
|
651 |
void __mmdrop(struct mm_struct *mm) |
1da177e4c
|
652 653 654 655 |
{ BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); |
cddb8a5c1
|
656 |
mmu_notifier_mm_destroy(mm); |
c3f0327f8
|
657 |
check_mm(mm); |
1da177e4c
|
658 659 |
free_mm(mm); } |
6d4e4c4fc
|
660 |
EXPORT_SYMBOL_GPL(__mmdrop); |
1da177e4c
|
661 |
|
ec8d7c14e
|
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 |
static inline void __mmput(struct mm_struct *mm) { VM_BUG_ON(atomic_read(&mm->mm_users)); uprobe_clear_state(mm); exit_aio(mm); ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ exit_mmap(mm); set_mm_exe_file(mm, NULL); if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); list_del(&mm->mmlist); spin_unlock(&mmlist_lock); } if (mm->binfmt) module_put(mm->binfmt->module); mmdrop(mm); } |
1da177e4c
|
681 682 683 684 685 |
/* * Decrement the use count and release all resources for an mm. */ void mmput(struct mm_struct *mm) { |
0ae26f1b3
|
686 |
might_sleep(); |
ec8d7c14e
|
687 688 689 690 |
if (atomic_dec_and_test(&mm->mm_users)) __mmput(mm); } EXPORT_SYMBOL_GPL(mmput); |
7ef949d77
|
691 |
#ifdef CONFIG_MMU |
ec8d7c14e
|
692 693 694 695 696 697 698 699 |
static void mmput_async_fn(struct work_struct *work) { struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); __mmput(mm); } void mmput_async(struct mm_struct *mm) { |
1da177e4c
|
700 |
if (atomic_dec_and_test(&mm->mm_users)) { |
ec8d7c14e
|
701 702 |
INIT_WORK(&mm->async_put_work, mmput_async_fn); schedule_work(&mm->async_put_work); |
1da177e4c
|
703 704 |
} } |
7ef949d77
|
705 |
#endif |
1da177e4c
|
706 |
|
90f31d0ea
|
707 708 709 710 711 |
/** * set_mm_exe_file - change a reference to the mm's executable file * * This changes mm's executable file (shown as symlink /proc/[pid]/exe). * |
6e399cd14
|
712 713 714 715 716 |
* Main users are mmput() and sys_execve(). Callers prevent concurrent * invocations: in mmput() nobody alive left, in execve task is single * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the * mm->exe_file, but does so without using set_mm_exe_file() in order * to do avoid the need for any locks. |
90f31d0ea
|
717 |
*/ |
386460138
|
718 719 |
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) { |
6e399cd14
|
720 721 722 723 724 725 726 727 |
struct file *old_exe_file; /* * It is safe to dereference the exe_file without RCU as * this function is only called if nobody else can access * this mm -- see comment above for justification. */ old_exe_file = rcu_dereference_raw(mm->exe_file); |
90f31d0ea
|
728 |
|
386460138
|
729 730 |
if (new_exe_file) get_file(new_exe_file); |
90f31d0ea
|
731 732 733 |
rcu_assign_pointer(mm->exe_file, new_exe_file); if (old_exe_file) fput(old_exe_file); |
386460138
|
734 |
} |
90f31d0ea
|
735 736 737 738 739 740 |
/** * get_mm_exe_file - acquire a reference to the mm's executable file * * Returns %NULL if mm has no associated executable file. * User must release file via fput(). */ |
386460138
|
741 742 743 |
struct file *get_mm_exe_file(struct mm_struct *mm) { struct file *exe_file; |
90f31d0ea
|
744 745 746 747 748 |
rcu_read_lock(); exe_file = rcu_dereference(mm->exe_file); if (exe_file && !get_file_rcu(exe_file)) exe_file = NULL; rcu_read_unlock(); |
386460138
|
749 750 |
return exe_file; } |
11163348a
|
751 |
EXPORT_SYMBOL(get_mm_exe_file); |
386460138
|
752 |
|
1da177e4c
|
753 754 755 |
/** * get_task_mm - acquire a reference to the task's mm * |
246bb0b1d
|
756 |
* Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning |
1da177e4c
|
757 758 759 760 761 762 763 764 765 766 767 768 |
* this kernel workthread has transiently adopted a user mm with use_mm, * to do its AIO) is not set and if so returns a reference to it, after * bumping up the use count. User must release the mm via mmput() * after use. Typically used by /proc and ptrace. */ struct mm_struct *get_task_mm(struct task_struct *task) { struct mm_struct *mm; task_lock(task); mm = task->mm; if (mm) { |
246bb0b1d
|
769 |
if (task->flags & PF_KTHREAD) |
1da177e4c
|
770 771 772 773 774 775 776 777 |
mm = NULL; else atomic_inc(&mm->mm_users); } task_unlock(task); return mm; } EXPORT_SYMBOL_GPL(get_task_mm); |
8cdb878dc
|
778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 |
struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) { struct mm_struct *mm; int err; err = mutex_lock_killable(&task->signal->cred_guard_mutex); if (err) return ERR_PTR(err); mm = get_task_mm(task); if (mm && mm != current->mm && !ptrace_may_access(task, mode)) { mmput(mm); mm = ERR_PTR(-EACCES); } mutex_unlock(&task->signal->cred_guard_mutex); return mm; } |
57b59c4a1
|
797 |
static void complete_vfork_done(struct task_struct *tsk) |
c415c3b47
|
798 |
{ |
d68b46fe1
|
799 |
struct completion *vfork; |
c415c3b47
|
800 |
|
d68b46fe1
|
801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 |
task_lock(tsk); vfork = tsk->vfork_done; if (likely(vfork)) { tsk->vfork_done = NULL; complete(vfork); } task_unlock(tsk); } static int wait_for_vfork_done(struct task_struct *child, struct completion *vfork) { int killed; freezer_do_not_count(); killed = wait_for_completion_killable(vfork); freezer_count(); if (killed) { task_lock(child); child->vfork_done = NULL; task_unlock(child); } put_task_struct(child); return killed; |
c415c3b47
|
827 |
} |
1da177e4c
|
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 |
/* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. * * mm_release is called after a mm_struct has been removed * from the current process. * * This difference is important for error handling, when we * only half set up a mm_struct for a new process and need to restore * the old one. Because we mmput the new mm_struct before * restoring the old one. . . * Eric Biederman 10 January 1998 */ void mm_release(struct task_struct *tsk, struct mm_struct *mm) { |
8141c7f3e
|
843 844 |
/* Get rid of any futexes when releasing the mm */ #ifdef CONFIG_FUTEX |
fc6b177de
|
845 |
if (unlikely(tsk->robust_list)) { |
8141c7f3e
|
846 |
exit_robust_list(tsk); |
fc6b177de
|
847 848 |
tsk->robust_list = NULL; } |
8141c7f3e
|
849 |
#ifdef CONFIG_COMPAT |
fc6b177de
|
850 |
if (unlikely(tsk->compat_robust_list)) { |
8141c7f3e
|
851 |
compat_exit_robust_list(tsk); |
fc6b177de
|
852 853 |
tsk->compat_robust_list = NULL; } |
8141c7f3e
|
854 |
#endif |
322a2c100
|
855 856 |
if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); |
8141c7f3e
|
857 |
#endif |
0326f5a94
|
858 |
uprobe_free_utask(tsk); |
1da177e4c
|
859 860 |
/* Get rid of any cached register state */ deactivate_mm(tsk, mm); |
fec1d0115
|
861 862 863 864 |
/* * If we're exiting normally, clear a user-space tid field if * requested. We leave this alone when dying by signal, to leave * the value intact in a core dump, and to save the unnecessary |
d68b46fe1
|
865 866 |
* trouble, say, a killed vfork parent shouldn't touch this mm. * Userland only wants this done for a sys_exit. |
fec1d0115
|
867 |
*/ |
9c8a8228d
|
868 869 870 871 872 873 874 875 876 877 878 |
if (tsk->clear_child_tid) { if (!(tsk->flags & PF_SIGNALED) && atomic_read(&mm->mm_users) > 1) { /* * We don't check the error code - if userspace has * not set up a proper pointer then tough luck. */ put_user(0, tsk->clear_child_tid); sys_futex(tsk->clear_child_tid, FUTEX_WAKE, 1, NULL, NULL, 0); } |
1da177e4c
|
879 |
tsk->clear_child_tid = NULL; |
1da177e4c
|
880 |
} |
f7505d64f
|
881 882 883 884 885 886 887 |
/* * All done, finally we can wake up parent and return this mm to him. * Also kthread_stop() uses this completion for synchronization. */ if (tsk->vfork_done) complete_vfork_done(tsk); |
1da177e4c
|
888 |
} |
a0a7ec308
|
889 890 891 892 |
/* * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */ |
ff252c1fc
|
893 |
static struct mm_struct *dup_mm(struct task_struct *tsk) |
a0a7ec308
|
894 895 896 |
{ struct mm_struct *mm, *oldmm = current->mm; int err; |
a0a7ec308
|
897 898 899 900 901 |
mm = allocate_mm(); if (!mm) goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); |
78fb74669
|
902 |
if (!mm_init(mm, tsk)) |
a0a7ec308
|
903 |
goto fail_nomem; |
a0a7ec308
|
904 905 906 907 908 909 |
err = dup_mmap(mm, oldmm); if (err) goto free_pt; mm->hiwater_rss = get_mm_rss(mm); mm->hiwater_vm = mm->total_vm; |
801460d0c
|
910 911 |
if (mm->binfmt && !try_module_get(mm->binfmt->module)) goto free_pt; |
a0a7ec308
|
912 913 914 |
return mm; free_pt: |
801460d0c
|
915 916 |
/* don't put binfmt in mmput, we haven't got module yet */ mm->binfmt = NULL; |
a0a7ec308
|
917 918 919 920 |
mmput(mm); fail_nomem: return NULL; |
a0a7ec308
|
921 |
} |
fb0a685cb
|
922 |
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) |
1da177e4c
|
923 |
{ |
fb0a685cb
|
924 |
struct mm_struct *mm, *oldmm; |
1da177e4c
|
925 926 927 928 |
int retval; tsk->min_flt = tsk->maj_flt = 0; tsk->nvcsw = tsk->nivcsw = 0; |
17406b82d
|
929 930 931 |
#ifdef CONFIG_DETECT_HUNG_TASK tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; #endif |
1da177e4c
|
932 933 934 935 936 937 938 939 940 941 942 943 |
tsk->mm = NULL; tsk->active_mm = NULL; /* * Are we cloning a kernel thread? * * We need to steal a active VM for that.. */ oldmm = current->mm; if (!oldmm) return 0; |
615d6e875
|
944 945 |
/* initialize the new vmacache entries */ vmacache_flush(tsk); |
1da177e4c
|
946 947 948 |
if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; |
1da177e4c
|
949 950 951 952 |
goto good_mm; } retval = -ENOMEM; |
a0a7ec308
|
953 |
mm = dup_mm(tsk); |
1da177e4c
|
954 955 |
if (!mm) goto fail_nomem; |
1da177e4c
|
956 957 958 959 |
good_mm: tsk->mm = mm; tsk->active_mm = mm; return 0; |
1da177e4c
|
960 961 |
fail_nomem: return retval; |
1da177e4c
|
962 |
} |
a39bc5169
|
963 |
static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) |
1da177e4c
|
964 |
{ |
498052bba
|
965 |
struct fs_struct *fs = current->fs; |
1da177e4c
|
966 |
if (clone_flags & CLONE_FS) { |
498052bba
|
967 |
/* tsk->fs is already what we want */ |
2a4419b5b
|
968 |
spin_lock(&fs->lock); |
498052bba
|
969 |
if (fs->in_exec) { |
2a4419b5b
|
970 |
spin_unlock(&fs->lock); |
498052bba
|
971 972 973 |
return -EAGAIN; } fs->users++; |
2a4419b5b
|
974 |
spin_unlock(&fs->lock); |
1da177e4c
|
975 976 |
return 0; } |
498052bba
|
977 |
tsk->fs = copy_fs_struct(fs); |
1da177e4c
|
978 979 980 981 |
if (!tsk->fs) return -ENOMEM; return 0; } |
fb0a685cb
|
982 |
static int copy_files(unsigned long clone_flags, struct task_struct *tsk) |
a016f3389
|
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 |
{ struct files_struct *oldf, *newf; int error = 0; /* * A background process may not have any files ... */ oldf = current->files; if (!oldf) goto out; if (clone_flags & CLONE_FILES) { atomic_inc(&oldf->count); goto out; } |
a016f3389
|
998 999 1000 1001 1002 1003 1004 1005 1006 |
newf = dup_fd(oldf, &error); if (!newf) goto out; tsk->files = newf; error = 0; out: return error; } |
fadad878c
|
1007 |
static int copy_io(unsigned long clone_flags, struct task_struct *tsk) |
fd0928df9
|
1008 1009 1010 |
{ #ifdef CONFIG_BLOCK struct io_context *ioc = current->io_context; |
6e736be7f
|
1011 |
struct io_context *new_ioc; |
fd0928df9
|
1012 1013 1014 |
if (!ioc) return 0; |
fadad878c
|
1015 1016 1017 1018 |
/* * Share io context with parent, if CLONE_IO is set */ if (clone_flags & CLONE_IO) { |
3d48749d9
|
1019 1020 |
ioc_task_link(ioc); tsk->io_context = ioc; |
fadad878c
|
1021 |
} else if (ioprio_valid(ioc->ioprio)) { |
6e736be7f
|
1022 1023 |
new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); if (unlikely(!new_ioc)) |
fd0928df9
|
1024 |
return -ENOMEM; |
6e736be7f
|
1025 |
new_ioc->ioprio = ioc->ioprio; |
11a3122f6
|
1026 |
put_io_context(new_ioc); |
fd0928df9
|
1027 1028 1029 1030 |
} #endif return 0; } |
a39bc5169
|
1031 |
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) |
1da177e4c
|
1032 1033 |
{ struct sighand_struct *sig; |
60348802e
|
1034 |
if (clone_flags & CLONE_SIGHAND) { |
1da177e4c
|
1035 1036 1037 1038 |
atomic_inc(¤t->sighand->count); return 0; } sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); |
e56d09031
|
1039 |
rcu_assign_pointer(tsk->sighand, sig); |
1da177e4c
|
1040 1041 |
if (!sig) return -ENOMEM; |
9d7fb0427
|
1042 |
|
1da177e4c
|
1043 1044 1045 1046 |
atomic_set(&sig->count, 1); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); return 0; } |
a7e5328a0
|
1047 |
void __cleanup_sighand(struct sighand_struct *sighand) |
c81addc9d
|
1048 |
{ |
d80e731ec
|
1049 1050 |
if (atomic_dec_and_test(&sighand->count)) { signalfd_cleanup(sighand); |
392809b25
|
1051 1052 1053 1054 |
/* * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it * without an RCU grace period, see __lock_task_sighand(). */ |
c81addc9d
|
1055 |
kmem_cache_free(sighand_cachep, sighand); |
d80e731ec
|
1056 |
} |
c81addc9d
|
1057 |
} |
f06febc96
|
1058 1059 1060 1061 1062 |
/* * Initialize POSIX timer handling for a thread group. */ static void posix_cpu_timers_init_group(struct signal_struct *sig) { |
78d7d407b
|
1063 |
unsigned long cpu_limit; |
316c1608d
|
1064 |
cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); |
78d7d407b
|
1065 1066 |
if (cpu_limit != RLIM_INFINITY) { sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); |
d5c373eb5
|
1067 |
sig->cputimer.running = true; |
6279a751f
|
1068 |
} |
f06febc96
|
1069 1070 1071 1072 1073 |
/* The timer lists. */ INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); INIT_LIST_HEAD(&sig->cpu_timers[2]); } |
a39bc5169
|
1074 |
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) |
1da177e4c
|
1075 1076 |
{ struct signal_struct *sig; |
1da177e4c
|
1077 |
|
4ab6c0833
|
1078 |
if (clone_flags & CLONE_THREAD) |
490dea45d
|
1079 |
return 0; |
490dea45d
|
1080 |
|
a56704ef6
|
1081 |
sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL); |
1da177e4c
|
1082 1083 1084 |
tsk->signal = sig; if (!sig) return -ENOMEM; |
b3ac022cb
|
1085 |
sig->nr_threads = 1; |
1da177e4c
|
1086 |
atomic_set(&sig->live, 1); |
b3ac022cb
|
1087 |
atomic_set(&sig->sigcnt, 1); |
0c740d0af
|
1088 1089 1090 1091 |
/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */ sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); |
1da177e4c
|
1092 |
init_waitqueue_head(&sig->wait_chldexit); |
db51aeccd
|
1093 |
sig->curr_target = tsk; |
1da177e4c
|
1094 1095 |
init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); |
e78c34967
|
1096 |
seqlock_init(&sig->stats_lock); |
9d7fb0427
|
1097 |
prev_cputime_init(&sig->prev_cputime); |
1da177e4c
|
1098 |
|
c9cb2e3d7
|
1099 |
hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
1da177e4c
|
1100 |
sig->real_timer.function = it_real_fn; |
1da177e4c
|
1101 |
|
1da177e4c
|
1102 1103 1104 |
task_lock(current->group_leader); memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); |
6279a751f
|
1105 |
posix_cpu_timers_init_group(sig); |
522ed7767
|
1106 |
tty_audit_fork(sig); |
5091faa44
|
1107 |
sched_autogroup_fork(sig); |
522ed7767
|
1108 |
|
a63d83f42
|
1109 |
sig->oom_score_adj = current->signal->oom_score_adj; |
dabb16f63
|
1110 |
sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
28b83c519
|
1111 |
|
ebec18a6d
|
1112 1113 |
sig->has_child_subreaper = current->signal->has_child_subreaper || current->signal->is_child_subreaper; |
9b1bf12d5
|
1114 |
mutex_init(&sig->cred_guard_mutex); |
1da177e4c
|
1115 1116 |
return 0; } |
dbd952127
|
1117 1118 1119 1120 1121 1122 1123 1124 1125 |
static void copy_seccomp(struct task_struct *p) { #ifdef CONFIG_SECCOMP /* * Must be called with sighand->lock held, which is common to * all threads in the group. Holding cred_guard_mutex is not * needed because this new task is not yet running and cannot * be racing exec. */ |
69f6a34bd
|
1126 |
assert_spin_locked(¤t->sighand->siglock); |
dbd952127
|
1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 |
/* Ref-count the new filter user, and assign it. */ get_seccomp_filter(current); p->seccomp = current->seccomp; /* * Explicitly enable no_new_privs here in case it got set * between the task_struct being duplicated and holding the * sighand lock. The seccomp state and nnp must be in sync. */ if (task_no_new_privs(current)) task_set_no_new_privs(p); /* * If the parent gained a seccomp mode after copying thread * flags and between before we held the sighand lock, we have * to manually enable the seccomp thread flag here. */ if (p->seccomp.mode != SECCOMP_MODE_DISABLED) set_tsk_thread_flag(p, TIF_SECCOMP); #endif } |
17da2bd90
|
1149 |
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) |
1da177e4c
|
1150 1151 |
{ current->clear_child_tid = tidptr; |
b488893a3
|
1152 |
return task_pid_vnr(current); |
1da177e4c
|
1153 |
} |
a39bc5169
|
1154 |
static void rt_mutex_init_task(struct task_struct *p) |
23f78d4a0
|
1155 |
{ |
1d6154825
|
1156 |
raw_spin_lock_init(&p->pi_lock); |
e29e175b0
|
1157 |
#ifdef CONFIG_RT_MUTEXES |
fb00aca47
|
1158 1159 |
p->pi_waiters = RB_ROOT; p->pi_waiters_leftmost = NULL; |
23f78d4a0
|
1160 |
p->pi_blocked_on = NULL; |
23f78d4a0
|
1161 1162 |
#endif } |
1da177e4c
|
1163 |
/* |
f06febc96
|
1164 1165 1166 1167 |
* Initialize POSIX timer handling for a single task. */ static void posix_cpu_timers_init(struct task_struct *tsk) { |
648616343
|
1168 1169 |
tsk->cputime_expires.prof_exp = 0; tsk->cputime_expires.virt_exp = 0; |
f06febc96
|
1170 1171 1172 1173 1174 |
tsk->cputime_expires.sched_exp = 0; INIT_LIST_HEAD(&tsk->cpu_timers[0]); INIT_LIST_HEAD(&tsk->cpu_timers[1]); INIT_LIST_HEAD(&tsk->cpu_timers[2]); } |
819077398
|
1175 1176 1177 1178 1179 |
static inline void init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) { task->pids[type].pid = pid; } |
f06febc96
|
1180 |
/* |
1da177e4c
|
1181 1182 1183 1184 1185 1186 1187 |
* This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ |
36c8b5868
|
1188 1189 |
static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, |
36c8b5868
|
1190 |
unsigned long stack_size, |
36c8b5868
|
1191 |
int __user *child_tidptr, |
09a05394f
|
1192 |
struct pid *pid, |
3033f14ab
|
1193 |
int trace, |
725fc629f
|
1194 1195 |
unsigned long tls, int node) |
1da177e4c
|
1196 1197 |
{ int retval; |
a24efe62d
|
1198 |
struct task_struct *p; |
1da177e4c
|
1199 1200 1201 |
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); |
e66eded83
|
1202 1203 |
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); |
1da177e4c
|
1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 |
/* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); |
123be07b0
|
1218 1219 1220 1221 1222 1223 1224 1225 1226 |
/* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); |
8382fcac1
|
1227 |
/* |
40a0d32d1
|
1228 |
* If the new process will be in a different pid or user namespace |
faf00da54
|
1229 |
* do not allow it to share a thread group with the forking task. |
8382fcac1
|
1230 |
*/ |
faf00da54
|
1231 |
if (clone_flags & CLONE_THREAD) { |
40a0d32d1
|
1232 1233 1234 1235 1236 |
if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || (task_active_pid_ns(current) != current->nsproxy->pid_ns_for_children)) return ERR_PTR(-EINVAL); } |
8382fcac1
|
1237 |
|
1da177e4c
|
1238 1239 1240 1241 1242 |
retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; |
725fc629f
|
1243 |
p = dup_task_struct(current, node); |
1da177e4c
|
1244 1245 |
if (!p) goto fork_out; |
f7e8b616e
|
1246 |
ftrace_graph_init_task(p); |
bea493a03
|
1247 |
rt_mutex_init_task(p); |
d12c1a379
|
1248 |
#ifdef CONFIG_PROVE_LOCKING |
de30a2b35
|
1249 1250 1251 |
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif |
1da177e4c
|
1252 |
retval = -EAGAIN; |
3b11a1dec
|
1253 |
if (atomic_read(&p->real_cred->user->processes) >= |
78d7d407b
|
1254 |
task_rlimit(p, RLIMIT_NPROC)) { |
b57922b6c
|
1255 1256 |
if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) |
1da177e4c
|
1257 1258 |
goto bad_fork_free; } |
72fa59970
|
1259 |
current->flags &= ~PF_NPROC_EXCEEDED; |
1da177e4c
|
1260 |
|
f1752eec6
|
1261 1262 1263 |
retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; |
1da177e4c
|
1264 1265 1266 1267 1268 1269 |
/* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ |
04ec93fe9
|
1270 |
retval = -EAGAIN; |
1da177e4c
|
1271 1272 |
if (nr_threads >= max_threads) goto bad_fork_cleanup_count; |
ca74e92b4
|
1273 |
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
514ddb446
|
1274 1275 |
p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); p->flags |= PF_FORKNOEXEC; |
1da177e4c
|
1276 1277 |
INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); |
f41d911f8
|
1278 |
rcu_copy_process(p); |
1da177e4c
|
1279 1280 |
p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); |
1da177e4c
|
1281 |
|
1da177e4c
|
1282 |
init_sigpending(&p->pending); |
648616343
|
1283 1284 |
p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; |
9d7fb0427
|
1285 |
prev_cputime_init(&p->prev_cputime); |
6a61671bb
|
1286 |
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN |
b7ce2277f
|
1287 |
seqcount_init(&p->vtime_seqcount); |
6a61671bb
|
1288 |
p->vtime_snap = 0; |
7098c1eac
|
1289 |
p->vtime_snap_whence = VTIME_INACTIVE; |
6a61671bb
|
1290 |
#endif |
a3a2e76c7
|
1291 1292 1293 |
#if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif |
172ba844a
|
1294 |
|
6976675d9
|
1295 |
p->default_timer_slack_ns = current->timer_slack_ns; |
5995477ab
|
1296 |
task_io_accounting_init(&p->ioac); |
1da177e4c
|
1297 |
acct_clear_integrals(p); |
f06febc96
|
1298 |
posix_cpu_timers_init(p); |
1da177e4c
|
1299 |
|
ccbf62d8a
|
1300 |
p->start_time = ktime_get_ns(); |
57e0be041
|
1301 |
p->real_start_time = ktime_get_boot_ns(); |
1da177e4c
|
1302 |
p->io_context = NULL; |
1da177e4c
|
1303 |
p->audit_context = NULL; |
c9e75f049
|
1304 |
threadgroup_change_begin(current); |
b4f48b636
|
1305 |
cgroup_fork(p); |
1da177e4c
|
1306 |
#ifdef CONFIG_NUMA |
846a16bf0
|
1307 |
p->mempolicy = mpol_dup(p->mempolicy); |
fb0a685cb
|
1308 1309 1310 |
if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; |
e8604cb43
|
1311 |
goto bad_fork_cleanup_threadgroup_lock; |
fb0a685cb
|
1312 |
} |
1da177e4c
|
1313 |
#endif |
778d3b0ff
|
1314 1315 1316 |
#ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; |
cc9a6c877
|
1317 |
seqcount_init(&p->mems_allowed_seq); |
778d3b0ff
|
1318 |
#endif |
de30a2b35
|
1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 |
#ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; p->hardirqs_enabled = 0; p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif |
8bcbde548
|
1334 1335 |
p->pagefault_disabled = 0; |
fbb9ce953
|
1336 1337 1338 1339 1340 |
#ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif |
1da177e4c
|
1341 |
|
408894ee4
|
1342 1343 1344 |
#ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif |
cafe56359
|
1345 1346 1347 1348 |
#ifdef CONFIG_BCACHE p->sequential_io = 0; p->sequential_io_avg = 0; #endif |
0f4814065
|
1349 |
|
3c90e6e99
|
1350 |
/* Perform scheduler related setup. Assign this task to a CPU. */ |
aab03e05e
|
1351 1352 1353 |
retval = sched_fork(clone_flags, p); if (retval) goto bad_fork_cleanup_policy; |
6ab423e0e
|
1354 |
|
cdd6c482c
|
1355 |
retval = perf_event_init_task(p); |
6ab423e0e
|
1356 1357 |
if (retval) goto bad_fork_cleanup_policy; |
fb0a685cb
|
1358 1359 |
retval = audit_alloc(p); if (retval) |
6c72e3501
|
1360 |
goto bad_fork_cleanup_perf; |
1da177e4c
|
1361 |
/* copy all the process information */ |
ab602f799
|
1362 |
shm_init_task(p); |
fb0a685cb
|
1363 1364 |
retval = copy_semundo(clone_flags, p); if (retval) |
1da177e4c
|
1365 |
goto bad_fork_cleanup_audit; |
fb0a685cb
|
1366 1367 |
retval = copy_files(clone_flags, p); if (retval) |
1da177e4c
|
1368 |
goto bad_fork_cleanup_semundo; |
fb0a685cb
|
1369 1370 |
retval = copy_fs(clone_flags, p); if (retval) |
1da177e4c
|
1371 |
goto bad_fork_cleanup_files; |
fb0a685cb
|
1372 1373 |
retval = copy_sighand(clone_flags, p); if (retval) |
1da177e4c
|
1374 |
goto bad_fork_cleanup_fs; |
fb0a685cb
|
1375 1376 |
retval = copy_signal(clone_flags, p); if (retval) |
1da177e4c
|
1377 |
goto bad_fork_cleanup_sighand; |
fb0a685cb
|
1378 1379 |
retval = copy_mm(clone_flags, p); if (retval) |
1da177e4c
|
1380 |
goto bad_fork_cleanup_signal; |
fb0a685cb
|
1381 1382 |
retval = copy_namespaces(clone_flags, p); if (retval) |
d84f4f992
|
1383 |
goto bad_fork_cleanup_mm; |
fb0a685cb
|
1384 1385 |
retval = copy_io(clone_flags, p); if (retval) |
fd0928df9
|
1386 |
goto bad_fork_cleanup_namespaces; |
3033f14ab
|
1387 |
retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); |
1da177e4c
|
1388 |
if (retval) |
fd0928df9
|
1389 |
goto bad_fork_cleanup_io; |
1da177e4c
|
1390 |
|
425fb2b4b
|
1391 |
if (pid != &init_struct_pid) { |
c2b1df2eb
|
1392 |
pid = alloc_pid(p->nsproxy->pid_ns_for_children); |
35f71bc0a
|
1393 1394 |
if (IS_ERR(pid)) { retval = PTR_ERR(pid); |
0740aa5f6
|
1395 |
goto bad_fork_cleanup_thread; |
35f71bc0a
|
1396 |
} |
425fb2b4b
|
1397 |
} |
1da177e4c
|
1398 1399 1400 1401 |
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ |
fb0a685cb
|
1402 |
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; |
73c101011
|
1403 1404 1405 |
#ifdef CONFIG_BLOCK p->plug = NULL; #endif |
42b2dd0a0
|
1406 |
#ifdef CONFIG_FUTEX |
8f17d3a50
|
1407 1408 1409 1410 |
p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif |
c87e2837b
|
1411 1412 |
INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; |
42b2dd0a0
|
1413 |
#endif |
1da177e4c
|
1414 |
/* |
f9a3879ab
|
1415 1416 1417 |
* sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) |
2a7421383
|
1418 |
sas_ss_reset(p); |
f9a3879ab
|
1419 1420 |
/* |
6580807da
|
1421 1422 |
* Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. |
1da177e4c
|
1423 |
*/ |
6580807da
|
1424 |
user_disable_single_step(p); |
1da177e4c
|
1425 |
clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); |
ed75e8d58
|
1426 1427 1428 |
#ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif |
9745512ce
|
1429 |
clear_all_latency_tracing(p); |
1da177e4c
|
1430 |
|
1da177e4c
|
1431 |
/* ok, now we should be set up.. */ |
18c830df7
|
1432 1433 |
p->pid = pid_nr(pid); if (clone_flags & CLONE_THREAD) { |
5f8aadd8b
|
1434 |
p->exit_signal = -1; |
18c830df7
|
1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 |
p->group_leader = current->group_leader; p->tgid = current->tgid; } else { if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); p->group_leader = p; p->tgid = p->pid; } |
5f8aadd8b
|
1445 |
|
9d823e8f6
|
1446 1447 |
p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); |
83712358b
|
1448 |
p->dirty_paused_when = 0; |
9d823e8f6
|
1449 |
|
bb8cbbfee
|
1450 |
p->pdeath_signal = 0; |
47e65328a
|
1451 |
INIT_LIST_HEAD(&p->thread_group); |
158e1645e
|
1452 |
p->task_works = NULL; |
1da177e4c
|
1453 |
|
18c830df7
|
1454 |
/* |
7e47682ea
|
1455 1456 1457 1458 1459 |
* Ensure that the cgroup subsystem policies allow the new process to be * forked. It should be noted the the new process's css_set can be changed * between here and cgroup_post_fork() if an organisation operation is in * progress. */ |
b53202e63
|
1460 |
retval = cgroup_can_fork(p); |
7e47682ea
|
1461 1462 1463 1464 |
if (retval) goto bad_fork_free_pid; /* |
18c830df7
|
1465 1466 1467 |
* Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! */ |
1da177e4c
|
1468 |
write_lock_irq(&tasklist_lock); |
1da177e4c
|
1469 |
/* CLONE_PARENT re-uses the old parent */ |
2d5516cbb
|
1470 |
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { |
1da177e4c
|
1471 |
p->real_parent = current->real_parent; |
2d5516cbb
|
1472 1473 |
p->parent_exec_id = current->parent_exec_id; } else { |
1da177e4c
|
1474 |
p->real_parent = current; |
2d5516cbb
|
1475 1476 |
p->parent_exec_id = current->self_exec_id; } |
1da177e4c
|
1477 |
|
3f17da699
|
1478 |
spin_lock(¤t->sighand->siglock); |
4a2c7a783
|
1479 1480 |
/* |
dbd952127
|
1481 1482 1483 1484 1485 1486 |
* Copy seccomp details explicitly here, in case they were changed * before holding sighand lock. */ copy_seccomp(p); /* |
4a2c7a783
|
1487 1488 1489 1490 1491 1492 |
* Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). |
fb0a685cb
|
1493 |
*/ |
23ff44402
|
1494 |
recalc_sigpending(); |
4a2c7a783
|
1495 1496 1497 1498 |
if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; |
7e47682ea
|
1499 |
goto bad_fork_cancel_cgroup; |
4a2c7a783
|
1500 |
} |
73b9ebfe1
|
1501 |
if (likely(p->pid)) { |
4b9d33e6d
|
1502 |
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); |
73b9ebfe1
|
1503 |
|
819077398
|
1504 |
init_task_pid(p, PIDTYPE_PID, pid); |
73b9ebfe1
|
1505 |
if (thread_group_leader(p)) { |
819077398
|
1506 1507 |
init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); init_task_pid(p, PIDTYPE_SID, task_session(current)); |
1c4042c29
|
1508 |
if (is_child_reaper(pid)) { |
17cf22c33
|
1509 |
ns_of_pid(pid)->child_reaper = p; |
1c4042c29
|
1510 1511 |
p->signal->flags |= SIGNAL_UNKILLABLE; } |
73b9ebfe1
|
1512 |
|
fea9d1755
|
1513 |
p->signal->leader_pid = pid; |
9c9f4ded9
|
1514 |
p->signal->tty = tty_kref_get(current->signal->tty); |
9cd80bbb0
|
1515 |
list_add_tail(&p->sibling, &p->real_parent->children); |
5e85d4abe
|
1516 |
list_add_tail_rcu(&p->tasks, &init_task.tasks); |
819077398
|
1517 1518 |
attach_pid(p, PIDTYPE_PGID); attach_pid(p, PIDTYPE_SID); |
909ea9646
|
1519 |
__this_cpu_inc(process_counts); |
80628ca06
|
1520 1521 1522 1523 |
} else { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); |
80628ca06
|
1524 1525 |
list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
0c740d0af
|
1526 1527 |
list_add_tail_rcu(&p->thread_node, &p->signal->thread_head); |
73b9ebfe1
|
1528 |
} |
819077398
|
1529 |
attach_pid(p, PIDTYPE_PID); |
73b9ebfe1
|
1530 |
nr_threads++; |
1da177e4c
|
1531 |
} |
1da177e4c
|
1532 |
total_forks++; |
3f17da699
|
1533 |
spin_unlock(¤t->sighand->siglock); |
4af4206be
|
1534 |
syscall_tracepoint_update(p); |
1da177e4c
|
1535 |
write_unlock_irq(&tasklist_lock); |
4af4206be
|
1536 |
|
c13cf856c
|
1537 |
proc_fork_connector(p); |
b53202e63
|
1538 |
cgroup_post_fork(p); |
c9e75f049
|
1539 |
threadgroup_change_end(current); |
cdd6c482c
|
1540 |
perf_event_fork(p); |
43d2b1132
|
1541 1542 |
trace_task_newtask(p, clone_flags); |
3ab679661
|
1543 |
uprobe_copy_process(p, clone_flags); |
43d2b1132
|
1544 |
|
1da177e4c
|
1545 |
return p; |
7e47682ea
|
1546 |
bad_fork_cancel_cgroup: |
b53202e63
|
1547 |
cgroup_cancel_fork(p); |
425fb2b4b
|
1548 1549 1550 |
bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); |
0740aa5f6
|
1551 1552 |
bad_fork_cleanup_thread: exit_thread(p); |
fd0928df9
|
1553 |
bad_fork_cleanup_io: |
b69f22920
|
1554 1555 |
if (p->io_context) exit_io_context(p); |
ab516013a
|
1556 |
bad_fork_cleanup_namespaces: |
444f378b2
|
1557 |
exit_task_namespaces(p); |
1da177e4c
|
1558 |
bad_fork_cleanup_mm: |
c9f01245b
|
1559 |
if (p->mm) |
1da177e4c
|
1560 1561 |
mmput(p->mm); bad_fork_cleanup_signal: |
4ab6c0833
|
1562 |
if (!(clone_flags & CLONE_THREAD)) |
1c5354de9
|
1563 |
free_signal_struct(p->signal); |
1da177e4c
|
1564 |
bad_fork_cleanup_sighand: |
a7e5328a0
|
1565 |
__cleanup_sighand(p->sighand); |
1da177e4c
|
1566 1567 1568 1569 1570 1571 1572 1573 |
bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); |
6c72e3501
|
1574 |
bad_fork_cleanup_perf: |
cdd6c482c
|
1575 |
perf_event_free_task(p); |
6c72e3501
|
1576 |
bad_fork_cleanup_policy: |
1da177e4c
|
1577 |
#ifdef CONFIG_NUMA |
f0be3d32b
|
1578 |
mpol_put(p->mempolicy); |
e8604cb43
|
1579 |
bad_fork_cleanup_threadgroup_lock: |
1da177e4c
|
1580 |
#endif |
c9e75f049
|
1581 |
threadgroup_change_end(current); |
35df17c57
|
1582 |
delayacct_tsk_free(p); |
1da177e4c
|
1583 |
bad_fork_cleanup_count: |
d84f4f992
|
1584 |
atomic_dec(&p->cred->user->processes); |
e0e817392
|
1585 |
exit_creds(p); |
1da177e4c
|
1586 1587 |
bad_fork_free: free_task(p); |
fe7d37d1f
|
1588 1589 |
fork_out: return ERR_PTR(retval); |
1da177e4c
|
1590 |
} |
f106eee10
|
1591 1592 1593 1594 1595 1596 1597 1598 1599 |
static inline void init_idle_pids(struct pid_link *links) { enum pid_type type; for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { INIT_HLIST_NODE(&links[type].node); /* not really needed */ links[type].pid = &init_struct_pid; } } |
0db0628d9
|
1600 |
struct task_struct *fork_idle(int cpu) |
1da177e4c
|
1601 |
{ |
36c8b5868
|
1602 |
struct task_struct *task; |
725fc629f
|
1603 1604 |
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0, cpu_to_node(cpu)); |
f106eee10
|
1605 1606 |
if (!IS_ERR(task)) { init_idle_pids(task->pids); |
753ca4f31
|
1607 |
init_idle(task, cpu); |
f106eee10
|
1608 |
} |
73b9ebfe1
|
1609 |
|
1da177e4c
|
1610 1611 |
return task; } |
1da177e4c
|
1612 1613 1614 1615 1616 1617 |
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ |
3033f14ab
|
1618 |
long _do_fork(unsigned long clone_flags, |
1da177e4c
|
1619 |
unsigned long stack_start, |
1da177e4c
|
1620 1621 |
unsigned long stack_size, int __user *parent_tidptr, |
3033f14ab
|
1622 1623 |
int __user *child_tidptr, unsigned long tls) |
1da177e4c
|
1624 1625 1626 |
{ struct task_struct *p; int trace = 0; |
92476d7fc
|
1627 |
long nr; |
1da177e4c
|
1628 |
|
bdff746a3
|
1629 |
/* |
4b9d33e6d
|
1630 1631 1632 1633 |
* Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly * requested, no event is reported; otherwise, report if the event * for the type of forking is enabled. |
09a05394f
|
1634 |
*/ |
e80d6661c
|
1635 |
if (!(clone_flags & CLONE_UNTRACED)) { |
4b9d33e6d
|
1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 |
if (clone_flags & CLONE_VFORK) trace = PTRACE_EVENT_VFORK; else if ((clone_flags & CSIGNAL) != SIGCHLD) trace = PTRACE_EVENT_CLONE; else trace = PTRACE_EVENT_FORK; if (likely(!ptrace_event_enabled(current, trace))) trace = 0; } |
1da177e4c
|
1646 |
|
62e791c1b
|
1647 |
p = copy_process(clone_flags, stack_start, stack_size, |
725fc629f
|
1648 |
child_tidptr, NULL, trace, tls, NUMA_NO_NODE); |
1da177e4c
|
1649 1650 1651 1652 1653 1654 |
/* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; |
4e52365f2
|
1655 |
struct pid *pid; |
1da177e4c
|
1656 |
|
0a16b6075
|
1657 |
trace_sched_process_fork(current, p); |
4e52365f2
|
1658 1659 |
pid = get_task_pid(p, PIDTYPE_PID); nr = pid_vnr(pid); |
30e49c263
|
1660 1661 1662 |
if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); |
a6f5e0637
|
1663 |
|
1da177e4c
|
1664 1665 1666 |
if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); |
d68b46fe1
|
1667 |
get_task_struct(p); |
1da177e4c
|
1668 |
} |
3e51e3edf
|
1669 |
wake_up_new_task(p); |
1da177e4c
|
1670 |
|
4b9d33e6d
|
1671 1672 |
/* forking complete and child started to run, tell ptracer */ if (unlikely(trace)) |
4e52365f2
|
1673 |
ptrace_event_pid(trace, pid); |
09a05394f
|
1674 |
|
1da177e4c
|
1675 |
if (clone_flags & CLONE_VFORK) { |
d68b46fe1
|
1676 |
if (!wait_for_vfork_done(p, &vfork)) |
4e52365f2
|
1677 |
ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); |
1da177e4c
|
1678 |
} |
4e52365f2
|
1679 1680 |
put_pid(pid); |
1da177e4c
|
1681 |
} else { |
92476d7fc
|
1682 |
nr = PTR_ERR(p); |
1da177e4c
|
1683 |
} |
92476d7fc
|
1684 |
return nr; |
1da177e4c
|
1685 |
} |
3033f14ab
|
1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 |
#ifndef CONFIG_HAVE_COPY_THREAD_TLS /* For compatibility with architectures that call do_fork directly rather than * using the syscall entry points below. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { return _do_fork(clone_flags, stack_start, stack_size, parent_tidptr, child_tidptr, 0); } #endif |
2aa3a7f86
|
1699 1700 1701 1702 1703 |
/* * Create a kernel thread. */ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { |
3033f14ab
|
1704 1705 |
return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, (unsigned long)arg, NULL, NULL, 0); |
2aa3a7f86
|
1706 |
} |
2aa3a7f86
|
1707 |
|
d2125043a
|
1708 1709 1710 1711 |
#ifdef __ARCH_WANT_SYS_FORK SYSCALL_DEFINE0(fork) { #ifdef CONFIG_MMU |
3033f14ab
|
1712 |
return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0); |
d2125043a
|
1713 1714 |
#else /* can not support in nommu mode */ |
5d59e1827
|
1715 |
return -EINVAL; |
d2125043a
|
1716 1717 1718 1719 1720 1721 1722 |
#endif } #endif #ifdef __ARCH_WANT_SYS_VFORK SYSCALL_DEFINE0(vfork) { |
3033f14ab
|
1723 1724 |
return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 0, NULL, NULL, 0); |
d2125043a
|
1725 1726 1727 1728 1729 1730 1731 |
} #endif #ifdef __ARCH_WANT_SYS_CLONE #ifdef CONFIG_CLONE_BACKWARDS SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, |
3033f14ab
|
1732 |
unsigned long, tls, |
d2125043a
|
1733 1734 1735 1736 1737 |
int __user *, child_tidptr) #elif defined(CONFIG_CLONE_BACKWARDS2) SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, |
3033f14ab
|
1738 |
unsigned long, tls) |
dfa9771a7
|
1739 1740 1741 1742 1743 |
#elif defined(CONFIG_CLONE_BACKWARDS3) SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, int, stack_size, int __user *, parent_tidptr, int __user *, child_tidptr, |
3033f14ab
|
1744 |
unsigned long, tls) |
d2125043a
|
1745 1746 1747 1748 |
#else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, int __user *, child_tidptr, |
3033f14ab
|
1749 |
unsigned long, tls) |
d2125043a
|
1750 1751 |
#endif { |
3033f14ab
|
1752 |
return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls); |
d2125043a
|
1753 1754 |
} #endif |
5fd63b308
|
1755 1756 1757 |
#ifndef ARCH_MIN_MMSTRUCT_ALIGN #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif |
51cc50685
|
1758 |
static void sighand_ctor(void *data) |
aa1757f90
|
1759 1760 |
{ struct sighand_struct *sighand = data; |
a35afb830
|
1761 |
spin_lock_init(&sighand->siglock); |
b8fceee17
|
1762 |
init_waitqueue_head(&sighand->signalfd_wqh); |
aa1757f90
|
1763 |
} |
1da177e4c
|
1764 1765 1766 1767 |
void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, |
2dff44052
|
1768 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| |
5d097056c
|
1769 |
SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor); |
1da177e4c
|
1770 1771 |
signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, |
5d097056c
|
1772 1773 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); |
20c2df83d
|
1774 |
files_cachep = kmem_cache_create("files_cache", |
1da177e4c
|
1775 |
sizeof(struct files_struct), 0, |
5d097056c
|
1776 1777 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); |
20c2df83d
|
1778 |
fs_cachep = kmem_cache_create("fs_cache", |
1da177e4c
|
1779 |
sizeof(struct fs_struct), 0, |
5d097056c
|
1780 1781 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); |
6345d24da
|
1782 1783 1784 1785 1786 1787 1788 |
/* * FIXME! The "sizeof(struct mm_struct)" currently includes the * whole struct cpumask for the OFFSTACK case. We could change * this to *only* allocate as much of it as required by the * maximum number of CPU's we can ever have. The cpumask_allocation * is at the end of the structure, exactly for that reason. */ |
1da177e4c
|
1789 |
mm_cachep = kmem_cache_create("mm_struct", |
5fd63b308
|
1790 |
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
5d097056c
|
1791 1792 1793 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); |
8feae1311
|
1794 |
mmap_init(); |
665771939
|
1795 |
nsproxy_cache_init(); |
1da177e4c
|
1796 |
} |
cf2e340f4
|
1797 |
|
cf2e340f4
|
1798 |
/* |
9bfb23fc4
|
1799 |
* Check constraints on flags passed to the unshare system call. |
cf2e340f4
|
1800 |
*/ |
9bfb23fc4
|
1801 |
static int check_unshare_flags(unsigned long unshare_flags) |
cf2e340f4
|
1802 |
{ |
9bfb23fc4
|
1803 1804 |
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
50804fe37
|
1805 |
CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| |
a79a908fd
|
1806 |
CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP)) |
9bfb23fc4
|
1807 |
return -EINVAL; |
cf2e340f4
|
1808 |
/* |
12c641ab8
|
1809 1810 1811 1812 |
* Not implemented, but pretend it works if there is nothing * to unshare. Note that unsharing the address space or the * signal handlers also need to unshare the signal queues (aka * CLONE_THREAD). |
cf2e340f4
|
1813 |
*/ |
9bfb23fc4
|
1814 |
if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { |
12c641ab8
|
1815 1816 1817 1818 1819 1820 1821 1822 1823 |
if (!thread_group_empty(current)) return -EINVAL; } if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { if (atomic_read(¤t->sighand->count) > 1) return -EINVAL; } if (unshare_flags & CLONE_VM) { if (!current_is_single_threaded()) |
9bfb23fc4
|
1824 1825 |
return -EINVAL; } |
cf2e340f4
|
1826 1827 1828 1829 1830 |
return 0; } /* |
99d1419d9
|
1831 |
* Unshare the filesystem structure if it is being shared |
cf2e340f4
|
1832 1833 1834 1835 |
*/ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) { struct fs_struct *fs = current->fs; |
498052bba
|
1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 |
if (!(unshare_flags & CLONE_FS) || !fs) return 0; /* don't need lock here; in the worst case we'll do useless copy */ if (fs->users == 1) return 0; *new_fsp = copy_fs_struct(fs); if (!*new_fsp) return -ENOMEM; |
cf2e340f4
|
1846 1847 1848 1849 1850 |
return 0; } /* |
a016f3389
|
1851 |
* Unshare file descriptor table if it is being shared |
cf2e340f4
|
1852 1853 1854 1855 |
*/ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) { struct files_struct *fd = current->files; |
a016f3389
|
1856 |
int error = 0; |
cf2e340f4
|
1857 1858 |
if ((unshare_flags & CLONE_FILES) && |
a016f3389
|
1859 1860 1861 1862 1863 |
(fd && atomic_read(&fd->count) > 1)) { *new_fdp = dup_fd(fd, &error); if (!*new_fdp) return error; } |
cf2e340f4
|
1864 1865 1866 1867 1868 |
return 0; } /* |
cf2e340f4
|
1869 1870 1871 1872 1873 1874 1875 |
* unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ |
6559eed8c
|
1876 |
SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) |
cf2e340f4
|
1877 |
{ |
cf2e340f4
|
1878 |
struct fs_struct *fs, *new_fs = NULL; |
cf2e340f4
|
1879 |
struct files_struct *fd, *new_fd = NULL; |
b2e0d9870
|
1880 |
struct cred *new_cred = NULL; |
cf7b708c8
|
1881 |
struct nsproxy *new_nsproxy = NULL; |
9edff4ab1
|
1882 |
int do_sysvsem = 0; |
9bfb23fc4
|
1883 |
int err; |
cf2e340f4
|
1884 |
|
50804fe37
|
1885 |
/* |
faf00da54
|
1886 1887 |
* If unsharing a user namespace must also unshare the thread group * and unshare the filesystem root and working directories. |
b2e0d9870
|
1888 1889 |
*/ if (unshare_flags & CLONE_NEWUSER) |
e66eded83
|
1890 |
unshare_flags |= CLONE_THREAD | CLONE_FS; |
b2e0d9870
|
1891 |
/* |
50804fe37
|
1892 1893 1894 1895 |
* If unsharing vm, must also unshare signal handlers. */ if (unshare_flags & CLONE_VM) unshare_flags |= CLONE_SIGHAND; |
6013f67fc
|
1896 |
/* |
12c641ab8
|
1897 1898 1899 1900 1901 |
* If unsharing a signal handlers, must also unshare the signal queues. */ if (unshare_flags & CLONE_SIGHAND) unshare_flags |= CLONE_THREAD; /* |
9bfb23fc4
|
1902 1903 1904 1905 |
* If unsharing namespace, must also unshare filesystem information. */ if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; |
50804fe37
|
1906 1907 1908 1909 |
err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; |
9bfb23fc4
|
1910 |
/* |
6013f67fc
|
1911 1912 1913 1914 1915 |
* CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) |
9edff4ab1
|
1916 |
do_sysvsem = 1; |
fb0a685cb
|
1917 1918 |
err = unshare_fs(unshare_flags, &new_fs); if (err) |
9bfb23fc4
|
1919 |
goto bad_unshare_out; |
fb0a685cb
|
1920 1921 |
err = unshare_fd(unshare_flags, &new_fd); if (err) |
9bfb23fc4
|
1922 |
goto bad_unshare_cleanup_fs; |
b2e0d9870
|
1923 |
err = unshare_userns(unshare_flags, &new_cred); |
fb0a685cb
|
1924 |
if (err) |
9edff4ab1
|
1925 |
goto bad_unshare_cleanup_fd; |
b2e0d9870
|
1926 1927 1928 1929 |
err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_cred, new_fs); if (err) goto bad_unshare_cleanup_cred; |
c0b2fc316
|
1930 |
|
b2e0d9870
|
1931 |
if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { |
9edff4ab1
|
1932 1933 1934 1935 1936 1937 |
if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). */ exit_sem(current); } |
ab602f799
|
1938 1939 1940 1941 1942 |
if (unshare_flags & CLONE_NEWIPC) { /* Orphan segments in old ns (see sem above). */ exit_shm(current); shm_init_task(current); } |
ab516013a
|
1943 |
|
6f977e6b2
|
1944 |
if (new_nsproxy) |
cf7b708c8
|
1945 |
switch_task_namespaces(current, new_nsproxy); |
cf2e340f4
|
1946 |
|
cf7b708c8
|
1947 |
task_lock(current); |
cf2e340f4
|
1948 1949 |
if (new_fs) { fs = current->fs; |
2a4419b5b
|
1950 |
spin_lock(&fs->lock); |
cf2e340f4
|
1951 |
current->fs = new_fs; |
498052bba
|
1952 1953 1954 1955 |
if (--fs->users) new_fs = NULL; else new_fs = fs; |
2a4419b5b
|
1956 |
spin_unlock(&fs->lock); |
cf2e340f4
|
1957 |
} |
cf2e340f4
|
1958 1959 1960 1961 1962 1963 1964 |
if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); |
b2e0d9870
|
1965 1966 1967 1968 1969 1970 |
if (new_cred) { /* Install the new user namespace */ commit_creds(new_cred); new_cred = NULL; } |
cf2e340f4
|
1971 |
} |
b2e0d9870
|
1972 1973 1974 |
bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); |
cf2e340f4
|
1975 1976 1977 |
bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); |
cf2e340f4
|
1978 1979 |
bad_unshare_cleanup_fs: if (new_fs) |
498052bba
|
1980 |
free_fs_struct(new_fs); |
cf2e340f4
|
1981 |
|
cf2e340f4
|
1982 1983 1984 |
bad_unshare_out: return err; } |
3b1253880
|
1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 |
/* * Helper to unshare the files of the current task. * We don't want to expose copy_files internals to * the exec layer of the kernel. */ int unshare_files(struct files_struct **displaced) { struct task_struct *task = current; |
50704516f
|
1995 |
struct files_struct *copy = NULL; |
3b1253880
|
1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 |
int error; error = unshare_fd(CLONE_FILES, ©); if (error || !copy) { *displaced = NULL; return error; } *displaced = task->files; task_lock(task); task->files = copy; task_unlock(task); return 0; } |
16db3d3f1
|
2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 |
int sysctl_max_threads(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int ret; int threads = max_threads; int min = MIN_THREADS; int max = MAX_THREADS; t = *table; t.data = &threads; t.extra1 = &min; t.extra2 = &max; ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); if (ret || !write) return ret; set_max_threads(threads); return 0; } |