Blame view
kernel/fork.c
40.3 KB
1da177e4c
|
1 2 3 4 5 6 7 8 9 10 11 12 |
/* * linux/kernel/fork.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * 'fork.c' contains the help-routines for the 'fork' system call * (see also entry.S and others). * Fork is rather simple, once you get the hang of it, but the memory * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' */ |
1da177e4c
|
13 14 15 |
#include <linux/slab.h> #include <linux/init.h> #include <linux/unistd.h> |
1da177e4c
|
16 17 18 |
#include <linux/module.h> #include <linux/vmalloc.h> #include <linux/completion.h> |
1da177e4c
|
19 20 21 22 |
#include <linux/personality.h> #include <linux/mempolicy.h> #include <linux/sem.h> #include <linux/file.h> |
9f3acc314
|
23 |
#include <linux/fdtable.h> |
da9cbc873
|
24 |
#include <linux/iocontext.h> |
1da177e4c
|
25 26 27 |
#include <linux/key.h> #include <linux/binfmts.h> #include <linux/mman.h> |
cddb8a5c1
|
28 |
#include <linux/mmu_notifier.h> |
1da177e4c
|
29 |
#include <linux/fs.h> |
ab516013a
|
30 |
#include <linux/nsproxy.h> |
c59ede7b7
|
31 |
#include <linux/capability.h> |
1da177e4c
|
32 |
#include <linux/cpu.h> |
b4f48b636
|
33 |
#include <linux/cgroup.h> |
1da177e4c
|
34 |
#include <linux/security.h> |
a1e78772d
|
35 |
#include <linux/hugetlb.h> |
1da177e4c
|
36 37 38 |
#include <linux/swap.h> #include <linux/syscalls.h> #include <linux/jiffies.h> |
09a05394f
|
39 |
#include <linux/tracehook.h> |
1da177e4c
|
40 |
#include <linux/futex.h> |
8141c7f3e
|
41 |
#include <linux/compat.h> |
207205a2b
|
42 |
#include <linux/kthread.h> |
7c3ab7381
|
43 |
#include <linux/task_io_accounting_ops.h> |
ab2af1f50
|
44 |
#include <linux/rcupdate.h> |
1da177e4c
|
45 46 47 |
#include <linux/ptrace.h> #include <linux/mount.h> #include <linux/audit.h> |
78fb74669
|
48 |
#include <linux/memcontrol.h> |
f201ae235
|
49 |
#include <linux/ftrace.h> |
1da177e4c
|
50 51 |
#include <linux/profile.h> #include <linux/rmap.h> |
f8af4da3b
|
52 |
#include <linux/ksm.h> |
1da177e4c
|
53 |
#include <linux/acct.h> |
8f0ab5147
|
54 |
#include <linux/tsacct_kern.h> |
9f46080c4
|
55 |
#include <linux/cn_proc.h> |
ba96a0c88
|
56 |
#include <linux/freezer.h> |
ca74e92b4
|
57 |
#include <linux/delayacct.h> |
ad4ecbcba
|
58 |
#include <linux/taskstats_kern.h> |
0a4254058
|
59 |
#include <linux/random.h> |
522ed7767
|
60 |
#include <linux/tty.h> |
6f4e64335
|
61 |
#include <linux/proc_fs.h> |
fd0928df9
|
62 |
#include <linux/blkdev.h> |
5ad4e53bd
|
63 |
#include <linux/fs_struct.h> |
7c9f8861e
|
64 |
#include <linux/magic.h> |
cdd6c482c
|
65 |
#include <linux/perf_event.h> |
42c4ab41a
|
66 |
#include <linux/posix-timers.h> |
8e7cac798
|
67 |
#include <linux/user-return-notifier.h> |
3d5992d2a
|
68 |
#include <linux/oom.h> |
ba76149f4
|
69 |
#include <linux/khugepaged.h> |
1da177e4c
|
70 71 72 73 74 75 76 |
#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> |
ad8d75fff
|
77 |
#include <trace/events/sched.h> |
1da177e4c
|
78 79 80 81 82 83 84 85 86 |
/* * Protected counters by write_lock_irq(&tasklist_lock) */ unsigned long total_forks; /* Handle normal Linux uptimes. */ int nr_threads; /* The idle threads do not count.. */ int max_threads; /* tunable limit on nr_threads */ DEFINE_PER_CPU(unsigned long, process_counts) = 0; |
c59923a15
|
87 |
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
db1466b3e
|
88 89 90 91 92 93 94 95 |
#ifdef CONFIG_PROVE_RCU int lockdep_tasklist_lock_is_held(void) { return lockdep_is_held(&tasklist_lock); } EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); #endif /* #ifdef CONFIG_PROVE_RCU */ |
1da177e4c
|
96 97 98 99 100 |
int nr_processes(void) { int cpu; int total = 0; |
1d5107509
|
101 |
for_each_possible_cpu(cpu) |
1da177e4c
|
102 103 104 105 106 107 |
total += per_cpu(process_counts, cpu); return total; } #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR |
504f52b54
|
108 109 110 111 |
# define alloc_task_struct_node(node) \ kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node) # define free_task_struct(tsk) \ kmem_cache_free(task_struct_cachep, (tsk)) |
e18b890bb
|
112 |
static struct kmem_cache *task_struct_cachep; |
1da177e4c
|
113 |
#endif |
b69c49b78
|
114 |
#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
b6a84016b
|
115 116 |
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node) |
b69c49b78
|
117 118 119 120 121 122 |
{ #ifdef CONFIG_DEBUG_STACK_USAGE gfp_t mask = GFP_KERNEL | __GFP_ZERO; #else gfp_t mask = GFP_KERNEL; #endif |
b6a84016b
|
123 124 125 |
struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); return page ? page_address(page) : NULL; |
b69c49b78
|
126 127 128 129 130 131 132 |
} static inline void free_thread_info(struct thread_info *ti) { free_pages((unsigned long)ti, THREAD_SIZE_ORDER); } #endif |
1da177e4c
|
133 |
/* SLAB cache for signal_struct structures (tsk->signal) */ |
e18b890bb
|
134 |
static struct kmem_cache *signal_cachep; |
1da177e4c
|
135 136 |
/* SLAB cache for sighand_struct structures (tsk->sighand) */ |
e18b890bb
|
137 |
struct kmem_cache *sighand_cachep; |
1da177e4c
|
138 139 |
/* SLAB cache for files_struct structures (tsk->files) */ |
e18b890bb
|
140 |
struct kmem_cache *files_cachep; |
1da177e4c
|
141 142 |
/* SLAB cache for fs_struct structures (tsk->fs) */ |
e18b890bb
|
143 |
struct kmem_cache *fs_cachep; |
1da177e4c
|
144 145 |
/* SLAB cache for vm_area_struct structures */ |
e18b890bb
|
146 |
struct kmem_cache *vm_area_cachep; |
1da177e4c
|
147 148 |
/* SLAB cache for mm_struct structures (tsk->mm) */ |
e18b890bb
|
149 |
static struct kmem_cache *mm_cachep; |
1da177e4c
|
150 |
|
c6a7f5728
|
151 152 153 154 155 156 |
static void account_kernel_stack(struct thread_info *ti, int account) { struct zone *zone = page_zone(virt_to_page(ti)); mod_zone_page_state(zone, NR_KERNEL_STACK, account); } |
1da177e4c
|
157 158 |
void free_task(struct task_struct *tsk) { |
3e26c149c
|
159 |
prop_local_destroy_single(&tsk->dirties); |
c6a7f5728
|
160 |
account_kernel_stack(tsk->stack, -1); |
f7e4217b0
|
161 |
free_thread_info(tsk->stack); |
23f78d4a0
|
162 |
rt_mutex_debug_task_free(tsk); |
fb52607af
|
163 |
ftrace_graph_exit_task(tsk); |
1da177e4c
|
164 165 166 |
free_task_struct(tsk); } EXPORT_SYMBOL(free_task); |
ea6d290ca
|
167 168 |
static inline void free_signal_struct(struct signal_struct *sig) { |
97101eb41
|
169 |
taskstats_tgid_free(sig); |
1c5354de9
|
170 |
sched_autogroup_exit(sig); |
ea6d290ca
|
171 172 173 174 175 |
kmem_cache_free(signal_cachep, sig); } static inline void put_signal_struct(struct signal_struct *sig) { |
1c5354de9
|
176 |
if (atomic_dec_and_test(&sig->sigcnt)) |
ea6d290ca
|
177 178 |
free_signal_struct(sig); } |
158d9ebd1
|
179 |
void __put_task_struct(struct task_struct *tsk) |
1da177e4c
|
180 |
{ |
270f722d4
|
181 |
WARN_ON(!tsk->exit_state); |
1da177e4c
|
182 183 |
WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); |
e0e817392
|
184 |
exit_creds(tsk); |
35df17c57
|
185 |
delayacct_tsk_free(tsk); |
ea6d290ca
|
186 |
put_signal_struct(tsk->signal); |
1da177e4c
|
187 188 189 190 |
if (!profile_handoff_task(tsk)) free_task(tsk); } |
77c100c83
|
191 |
EXPORT_SYMBOL_GPL(__put_task_struct); |
1da177e4c
|
192 |
|
2adee9b30
|
193 194 195 196 197 198 199 |
/* * macro override instead of weak attribute alias, to workaround * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. */ #ifndef arch_task_cache_init #define arch_task_cache_init() #endif |
61c4628b5
|
200 |
|
1da177e4c
|
201 202 203 204 205 206 207 208 209 |
void __init fork_init(unsigned long mempages) { #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES #endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", sizeof(struct task_struct), |
2dff44052
|
210 |
ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); |
1da177e4c
|
211 |
#endif |
61c4628b5
|
212 213 |
/* do the arch specific task caches init */ arch_task_cache_init(); |
1da177e4c
|
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
/* * The default maximum number of threads is set to a safe * value: the thread structures can take up at most half * of memory. */ max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); /* * we need to allow at least 20 threads to boot a system */ if(max_threads < 20) max_threads = 20; init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; } |
61c4628b5
|
232 233 234 235 236 237 |
int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; return 0; } |
1da177e4c
|
238 239 240 241 |
static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; |
7c9f8861e
|
242 |
unsigned long *stackend; |
207205a2b
|
243 |
int node = tsk_fork_get_node(orig); |
3e26c149c
|
244 |
int err; |
1da177e4c
|
245 246 |
prepare_to_copy(orig); |
504f52b54
|
247 |
tsk = alloc_task_struct_node(node); |
1da177e4c
|
248 249 |
if (!tsk) return NULL; |
b6a84016b
|
250 |
ti = alloc_thread_info_node(tsk, node); |
1da177e4c
|
251 252 253 254 |
if (!ti) { free_task_struct(tsk); return NULL; } |
61c4628b5
|
255 256 257 |
err = arch_dup_task_struct(tsk, orig); if (err) goto out; |
f7e4217b0
|
258 |
tsk->stack = ti; |
3e26c149c
|
259 260 |
err = prop_local_init_single(&tsk->dirties); |
61c4628b5
|
261 262 |
if (err) goto out; |
3e26c149c
|
263 |
|
10ebffde3
|
264 |
setup_thread_stack(tsk, orig); |
8e7cac798
|
265 |
clear_user_return_notifier(tsk); |
f26f9aff6
|
266 |
clear_tsk_need_resched(tsk); |
7c9f8861e
|
267 268 |
stackend = end_of_stack(tsk); *stackend = STACK_END_MAGIC; /* for overflow detection */ |
1da177e4c
|
269 |
|
0a4254058
|
270 271 272 |
#ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int(); #endif |
1da177e4c
|
273 274 |
/* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); |
4b5d37ac0
|
275 |
atomic_set(&tsk->fs_excl, 0); |
6c5c93415
|
276 |
#ifdef CONFIG_BLK_DEV_IO_TRACE |
2056a782f
|
277 |
tsk->btrace_seq = 0; |
6c5c93415
|
278 |
#endif |
a0aa7f68a
|
279 |
tsk->splice_pipe = NULL; |
c6a7f5728
|
280 281 |
account_kernel_stack(ti, 1); |
1da177e4c
|
282 |
return tsk; |
61c4628b5
|
283 284 285 286 287 |
out: free_thread_info(ti); free_task_struct(tsk); return NULL; |
1da177e4c
|
288 289 290 |
} #ifdef CONFIG_MMU |
a39bc5169
|
291 |
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) |
1da177e4c
|
292 |
{ |
297c5eee3
|
293 |
struct vm_area_struct *mpnt, *tmp, *prev, **pprev; |
1da177e4c
|
294 295 296 297 298 299 |
struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; struct mempolicy *pol; down_write(&oldmm->mmap_sem); |
ec8c0446b
|
300 |
flush_cache_dup_mm(oldmm); |
ad3394517
|
301 302 303 304 |
/* * Not linked in yet - no deadlock potential: */ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); |
7ee782325
|
305 |
|
1da177e4c
|
306 307 308 309 |
mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; mm->free_area_cache = oldmm->mmap_base; |
1363c3cd8
|
310 |
mm->cached_hole_size = ~0UL; |
1da177e4c
|
311 |
mm->map_count = 0; |
948942445
|
312 |
cpumask_clear(mm_cpumask(mm)); |
1da177e4c
|
313 314 315 316 |
mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; pprev = &mm->mmap; |
f8af4da3b
|
317 318 319 |
retval = ksm_fork(mm, oldmm); if (retval) goto out; |
ba76149f4
|
320 321 322 |
retval = khugepaged_fork(mm, oldmm); if (retval) goto out; |
1da177e4c
|
323 |
|
297c5eee3
|
324 |
prev = NULL; |
fd3e42fcc
|
325 |
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
1da177e4c
|
326 327 328 |
struct file *file; if (mpnt->vm_flags & VM_DONTCOPY) { |
3b6bfcdb1
|
329 330 |
long pages = vma_pages(mpnt); mm->total_vm -= pages; |
ab50b8ed8
|
331 |
vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, |
3b6bfcdb1
|
332 |
-pages); |
1da177e4c
|
333 334 335 336 337 338 339 340 341 |
continue; } charge = 0; if (mpnt->vm_flags & VM_ACCOUNT) { unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; if (security_vm_enough_memory(len)) goto fail_nomem; charge = len; } |
e94b17660
|
342 |
tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); |
1da177e4c
|
343 344 345 |
if (!tmp) goto fail_nomem; *tmp = *mpnt; |
5beb49305
|
346 |
INIT_LIST_HEAD(&tmp->anon_vma_chain); |
846a16bf0
|
347 |
pol = mpol_dup(vma_policy(mpnt)); |
1da177e4c
|
348 349 350 351 |
retval = PTR_ERR(pol); if (IS_ERR(pol)) goto fail_nomem_policy; vma_set_policy(tmp, pol); |
a247c3a97
|
352 |
tmp->vm_mm = mm; |
5beb49305
|
353 354 |
if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; |
1da177e4c
|
355 |
tmp->vm_flags &= ~VM_LOCKED; |
297c5eee3
|
356 |
tmp->vm_next = tmp->vm_prev = NULL; |
1da177e4c
|
357 358 |
file = tmp->vm_file; if (file) { |
f3a43f3f6
|
359 |
struct inode *inode = file->f_path.dentry->d_inode; |
b88ed2059
|
360 |
struct address_space *mapping = file->f_mapping; |
1da177e4c
|
361 362 363 |
get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); |
b88ed2059
|
364 365 366 |
spin_lock(&mapping->i_mmap_lock); if (tmp->vm_flags & VM_SHARED) mapping->i_mmap_writable++; |
1da177e4c
|
367 |
tmp->vm_truncate_count = mpnt->vm_truncate_count; |
b88ed2059
|
368 369 |
flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ |
1da177e4c
|
370 |
vma_prio_tree_add(tmp, mpnt); |
b88ed2059
|
371 372 |
flush_dcache_mmap_unlock(mapping); spin_unlock(&mapping->i_mmap_lock); |
1da177e4c
|
373 374 375 |
} /* |
a1e78772d
|
376 377 378 379 380 381 382 383 |
* Clear hugetlb-related page reserves for children. This only * affects MAP_PRIVATE mappings. Faults generated by the child * are not guaranteed to succeed, even if read-only */ if (is_vm_hugetlb_page(tmp)) reset_vma_resv_huge_pages(tmp); /* |
7ee782325
|
384 |
* Link in the new vma and copy the page table entries. |
1da177e4c
|
385 |
*/ |
1da177e4c
|
386 387 |
*pprev = tmp; pprev = &tmp->vm_next; |
297c5eee3
|
388 389 |
tmp->vm_prev = prev; prev = tmp; |
1da177e4c
|
390 391 392 393 394 395 |
__vma_link_rb(mm, tmp, rb_link, rb_parent); rb_link = &tmp->vm_rb.rb_right; rb_parent = &tmp->vm_rb; mm->map_count++; |
0b0db14c5
|
396 |
retval = copy_page_range(mm, oldmm, mpnt); |
1da177e4c
|
397 398 399 400 401 402 403 |
if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); if (retval) goto out; } |
d6dd61c83
|
404 405 |
/* a new mm has just been created */ arch_dup_mmap(oldmm, mm); |
1da177e4c
|
406 |
retval = 0; |
1da177e4c
|
407 |
out: |
7ee782325
|
408 |
up_write(&mm->mmap_sem); |
fd3e42fcc
|
409 |
flush_tlb_mm(oldmm); |
1da177e4c
|
410 411 |
up_write(&oldmm->mmap_sem); return retval; |
5beb49305
|
412 413 |
fail_nomem_anon_vma_fork: mpol_put(pol); |
1da177e4c
|
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 |
fail_nomem_policy: kmem_cache_free(vm_area_cachep, tmp); fail_nomem: retval = -ENOMEM; vm_unacct_memory(charge); goto out; } static inline int mm_alloc_pgd(struct mm_struct * mm) { mm->pgd = pgd_alloc(mm); if (unlikely(!mm->pgd)) return -ENOMEM; return 0; } static inline void mm_free_pgd(struct mm_struct * mm) { |
5e5419734
|
432 |
pgd_free(mm, mm->pgd); |
1da177e4c
|
433 434 435 436 437 438 |
} #else #define dup_mmap(mm, oldmm) (0) #define mm_alloc_pgd(mm) (0) #define mm_free_pgd(mm) #endif /* CONFIG_MMU */ |
23ff44402
|
439 |
__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); |
1da177e4c
|
440 |
|
e94b17660
|
441 |
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) |
1da177e4c
|
442 |
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) |
4cb0e11b1
|
443 444 445 446 447 448 449 450 451 452 453 |
static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; static int __init coredump_filter_setup(char *s) { default_dump_filter = (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & MMF_DUMP_FILTER_MASK; return 1; } __setup("coredump_filter=", coredump_filter_setup); |
1da177e4c
|
454 |
#include <linux/init_task.h> |
858f09930
|
455 456 457 458 459 460 461 |
static void mm_init_aio(struct mm_struct *mm) { #ifdef CONFIG_AIO spin_lock_init(&mm->ioctx_lock); INIT_HLIST_HEAD(&mm->ioctx_list); #endif } |
78fb74669
|
462 |
static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
1da177e4c
|
463 464 465 466 467 |
{ atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); |
f8af4da3b
|
468 469 |
mm->flags = (current->mm) ? (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; |
999d9fc16
|
470 |
mm->core_state = NULL; |
1da177e4c
|
471 |
mm->nr_ptes = 0; |
d559db086
|
472 |
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
1da177e4c
|
473 |
spin_lock_init(&mm->page_table_lock); |
1da177e4c
|
474 |
mm->free_area_cache = TASK_UNMAPPED_BASE; |
1363c3cd8
|
475 |
mm->cached_hole_size = ~0UL; |
858f09930
|
476 |
mm_init_aio(mm); |
cf475ad28
|
477 |
mm_init_owner(mm, p); |
3d5992d2a
|
478 |
atomic_set(&mm->oom_disable_count, 0); |
1da177e4c
|
479 480 481 |
if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; |
cddb8a5c1
|
482 |
mmu_notifier_mm_init(mm); |
1da177e4c
|
483 484 |
return mm; } |
78fb74669
|
485 |
|
1da177e4c
|
486 487 488 489 490 491 492 493 494 495 496 497 498 499 |
free_mm(mm); return NULL; } /* * Allocate and initialize an mm_struct. */ struct mm_struct * mm_alloc(void) { struct mm_struct * mm; mm = allocate_mm(); if (mm) { memset(mm, 0, sizeof(*mm)); |
78fb74669
|
500 |
mm = mm_init(mm, current); |
1da177e4c
|
501 502 503 504 505 506 507 508 509 |
} return mm; } /* * Called when the last reference to the mm * is dropped: either by a lazy thread or by * mmput. Free the page directory and the mm. */ |
7ad5b3a50
|
510 |
void __mmdrop(struct mm_struct *mm) |
1da177e4c
|
511 512 513 514 |
{ BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); |
cddb8a5c1
|
515 |
mmu_notifier_mm_destroy(mm); |
e7a00c45f
|
516 517 518 |
#ifdef CONFIG_TRANSPARENT_HUGEPAGE VM_BUG_ON(mm->pmd_huge_pte); #endif |
1da177e4c
|
519 520 |
free_mm(mm); } |
6d4e4c4fc
|
521 |
EXPORT_SYMBOL_GPL(__mmdrop); |
1da177e4c
|
522 523 524 525 526 527 |
/* * Decrement the use count and release all resources for an mm. */ void mmput(struct mm_struct *mm) { |
0ae26f1b3
|
528 |
might_sleep(); |
1da177e4c
|
529 530 |
if (atomic_dec_and_test(&mm->mm_users)) { exit_aio(mm); |
1c2fb7a4c
|
531 |
ksm_exit(mm); |
ba76149f4
|
532 |
khugepaged_exit(mm); /* must run before exit_mmap */ |
1da177e4c
|
533 |
exit_mmap(mm); |
925d1c401
|
534 |
set_mm_exe_file(mm, NULL); |
1da177e4c
|
535 536 537 538 539 540 |
if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); list_del(&mm->mmlist); spin_unlock(&mmlist_lock); } put_swap_token(mm); |
801460d0c
|
541 542 |
if (mm->binfmt) module_put(mm->binfmt->module); |
1da177e4c
|
543 544 545 546 547 548 549 550 |
mmdrop(mm); } } EXPORT_SYMBOL_GPL(mmput); /** * get_task_mm - acquire a reference to the task's mm * |
246bb0b1d
|
551 |
* Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning |
1da177e4c
|
552 553 554 555 556 557 558 559 560 561 562 563 |
* this kernel workthread has transiently adopted a user mm with use_mm, * to do its AIO) is not set and if so returns a reference to it, after * bumping up the use count. User must release the mm via mmput() * after use. Typically used by /proc and ptrace. */ struct mm_struct *get_task_mm(struct task_struct *task) { struct mm_struct *mm; task_lock(task); mm = task->mm; if (mm) { |
246bb0b1d
|
564 |
if (task->flags & PF_KTHREAD) |
1da177e4c
|
565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 |
mm = NULL; else atomic_inc(&mm->mm_users); } task_unlock(task); return mm; } EXPORT_SYMBOL_GPL(get_task_mm); /* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. * * mm_release is called after a mm_struct has been removed * from the current process. * * This difference is important for error handling, when we * only half set up a mm_struct for a new process and need to restore * the old one. Because we mmput the new mm_struct before * restoring the old one. . . * Eric Biederman 10 January 1998 */ void mm_release(struct task_struct *tsk, struct mm_struct *mm) { struct completion *vfork_done = tsk->vfork_done; |
8141c7f3e
|
590 591 |
/* Get rid of any futexes when releasing the mm */ #ifdef CONFIG_FUTEX |
fc6b177de
|
592 |
if (unlikely(tsk->robust_list)) { |
8141c7f3e
|
593 |
exit_robust_list(tsk); |
fc6b177de
|
594 595 |
tsk->robust_list = NULL; } |
8141c7f3e
|
596 |
#ifdef CONFIG_COMPAT |
fc6b177de
|
597 |
if (unlikely(tsk->compat_robust_list)) { |
8141c7f3e
|
598 |
compat_exit_robust_list(tsk); |
fc6b177de
|
599 600 |
tsk->compat_robust_list = NULL; } |
8141c7f3e
|
601 |
#endif |
322a2c100
|
602 603 |
if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); |
8141c7f3e
|
604 |
#endif |
1da177e4c
|
605 606 607 608 609 610 611 612 |
/* Get rid of any cached register state */ deactivate_mm(tsk, mm); /* notify parent sleeping on vfork() */ if (vfork_done) { tsk->vfork_done = NULL; complete(vfork_done); } |
fec1d0115
|
613 614 615 616 617 618 619 |
/* * If we're exiting normally, clear a user-space tid field if * requested. We leave this alone when dying by signal, to leave * the value intact in a core dump, and to save the unnecessary * trouble otherwise. Userland only wants this done for a sys_exit. */ |
9c8a8228d
|
620 621 622 623 624 625 626 627 628 629 630 |
if (tsk->clear_child_tid) { if (!(tsk->flags & PF_SIGNALED) && atomic_read(&mm->mm_users) > 1) { /* * We don't check the error code - if userspace has * not set up a proper pointer then tough luck. */ put_user(0, tsk->clear_child_tid); sys_futex(tsk->clear_child_tid, FUTEX_WAKE, 1, NULL, NULL, 0); } |
1da177e4c
|
631 |
tsk->clear_child_tid = NULL; |
1da177e4c
|
632 633 |
} } |
a0a7ec308
|
634 635 636 637 |
/* * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */ |
402b08622
|
638 |
struct mm_struct *dup_mm(struct task_struct *tsk) |
a0a7ec308
|
639 640 641 642 643 644 645 646 647 648 649 650 |
{ struct mm_struct *mm, *oldmm = current->mm; int err; if (!oldmm) return NULL; mm = allocate_mm(); if (!mm) goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); |
7602bdf2f
|
651 652 653 |
/* Initializing for Swap token stuff */ mm->token_priority = 0; mm->last_interval = 0; |
e7a00c45f
|
654 655 656 |
#ifdef CONFIG_TRANSPARENT_HUGEPAGE mm->pmd_huge_pte = NULL; #endif |
78fb74669
|
657 |
if (!mm_init(mm, tsk)) |
a0a7ec308
|
658 659 660 661 |
goto fail_nomem; if (init_new_context(tsk, mm)) goto fail_nocontext; |
925d1c401
|
662 |
dup_mm_exe_file(oldmm, mm); |
a0a7ec308
|
663 664 665 666 667 668 |
err = dup_mmap(mm, oldmm); if (err) goto free_pt; mm->hiwater_rss = get_mm_rss(mm); mm->hiwater_vm = mm->total_vm; |
801460d0c
|
669 670 |
if (mm->binfmt && !try_module_get(mm->binfmt->module)) goto free_pt; |
a0a7ec308
|
671 672 673 |
return mm; free_pt: |
801460d0c
|
674 675 |
/* don't put binfmt in mmput, we haven't got module yet */ mm->binfmt = NULL; |
a0a7ec308
|
676 677 678 679 680 681 682 683 684 685 686 687 688 689 |
mmput(mm); fail_nomem: return NULL; fail_nocontext: /* * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() */ mm_free_pgd(mm); free_mm(mm); return NULL; } |
1da177e4c
|
690 691 692 693 694 695 696 |
static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) { struct mm_struct * mm, *oldmm; int retval; tsk->min_flt = tsk->maj_flt = 0; tsk->nvcsw = tsk->nivcsw = 0; |
17406b82d
|
697 698 699 |
#ifdef CONFIG_DETECT_HUNG_TASK tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; #endif |
1da177e4c
|
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 |
tsk->mm = NULL; tsk->active_mm = NULL; /* * Are we cloning a kernel thread? * * We need to steal a active VM for that.. */ oldmm = current->mm; if (!oldmm) return 0; if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; |
1da177e4c
|
716 717 718 719 |
goto good_mm; } retval = -ENOMEM; |
a0a7ec308
|
720 |
mm = dup_mm(tsk); |
1da177e4c
|
721 722 |
if (!mm) goto fail_nomem; |
1da177e4c
|
723 |
good_mm: |
7602bdf2f
|
724 725 726 |
/* Initializing for Swap token stuff */ mm->token_priority = 0; mm->last_interval = 0; |
3d5992d2a
|
727 728 |
if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) atomic_inc(&mm->oom_disable_count); |
7602bdf2f
|
729 |
|
1da177e4c
|
730 731 732 |
tsk->mm = mm; tsk->active_mm = mm; return 0; |
1da177e4c
|
733 734 |
fail_nomem: return retval; |
1da177e4c
|
735 |
} |
a39bc5169
|
736 |
static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) |
1da177e4c
|
737 |
{ |
498052bba
|
738 |
struct fs_struct *fs = current->fs; |
1da177e4c
|
739 |
if (clone_flags & CLONE_FS) { |
498052bba
|
740 |
/* tsk->fs is already what we want */ |
2a4419b5b
|
741 |
spin_lock(&fs->lock); |
498052bba
|
742 |
if (fs->in_exec) { |
2a4419b5b
|
743 |
spin_unlock(&fs->lock); |
498052bba
|
744 745 746 |
return -EAGAIN; } fs->users++; |
2a4419b5b
|
747 |
spin_unlock(&fs->lock); |
1da177e4c
|
748 749 |
return 0; } |
498052bba
|
750 |
tsk->fs = copy_fs_struct(fs); |
1da177e4c
|
751 752 753 754 |
if (!tsk->fs) return -ENOMEM; return 0; } |
a016f3389
|
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 |
static int copy_files(unsigned long clone_flags, struct task_struct * tsk) { struct files_struct *oldf, *newf; int error = 0; /* * A background process may not have any files ... */ oldf = current->files; if (!oldf) goto out; if (clone_flags & CLONE_FILES) { atomic_inc(&oldf->count); goto out; } |
a016f3389
|
771 772 773 774 775 776 777 778 779 |
newf = dup_fd(oldf, &error); if (!newf) goto out; tsk->files = newf; error = 0; out: return error; } |
fadad878c
|
780 |
static int copy_io(unsigned long clone_flags, struct task_struct *tsk) |
fd0928df9
|
781 782 783 784 785 786 |
{ #ifdef CONFIG_BLOCK struct io_context *ioc = current->io_context; if (!ioc) return 0; |
fadad878c
|
787 788 789 790 791 792 793 794 |
/* * Share io context with parent, if CLONE_IO is set */ if (clone_flags & CLONE_IO) { tsk->io_context = ioc_task_link(ioc); if (unlikely(!tsk->io_context)) return -ENOMEM; } else if (ioprio_valid(ioc->ioprio)) { |
fd0928df9
|
795 796 797 |
tsk->io_context = alloc_io_context(GFP_KERNEL, -1); if (unlikely(!tsk->io_context)) return -ENOMEM; |
fd0928df9
|
798 799 800 801 802 |
tsk->io_context->ioprio = ioc->ioprio; } #endif return 0; } |
a39bc5169
|
803 |
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) |
1da177e4c
|
804 805 |
{ struct sighand_struct *sig; |
60348802e
|
806 |
if (clone_flags & CLONE_SIGHAND) { |
1da177e4c
|
807 808 809 810 |
atomic_inc(¤t->sighand->count); return 0; } sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); |
e56d09031
|
811 |
rcu_assign_pointer(tsk->sighand, sig); |
1da177e4c
|
812 813 |
if (!sig) return -ENOMEM; |
1da177e4c
|
814 815 816 817 |
atomic_set(&sig->count, 1); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); return 0; } |
a7e5328a0
|
818 |
void __cleanup_sighand(struct sighand_struct *sighand) |
c81addc9d
|
819 |
{ |
c81addc9d
|
820 821 822 |
if (atomic_dec_and_test(&sighand->count)) kmem_cache_free(sighand_cachep, sighand); } |
f06febc96
|
823 824 825 826 827 828 |
/* * Initialize POSIX timer handling for a thread group. */ static void posix_cpu_timers_init_group(struct signal_struct *sig) { |
78d7d407b
|
829 |
unsigned long cpu_limit; |
f06febc96
|
830 831 |
/* Thread group counters. */ thread_group_cputime_init(sig); |
78d7d407b
|
832 833 834 |
cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); if (cpu_limit != RLIM_INFINITY) { sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); |
6279a751f
|
835 836 |
sig->cputimer.running = 1; } |
f06febc96
|
837 838 839 840 841 |
/* The timer lists. */ INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); INIT_LIST_HEAD(&sig->cpu_timers[2]); } |
a39bc5169
|
842 |
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) |
1da177e4c
|
843 844 |
{ struct signal_struct *sig; |
1da177e4c
|
845 |
|
4ab6c0833
|
846 |
if (clone_flags & CLONE_THREAD) |
490dea45d
|
847 |
return 0; |
490dea45d
|
848 |
|
a56704ef6
|
849 |
sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL); |
1da177e4c
|
850 851 852 |
tsk->signal = sig; if (!sig) return -ENOMEM; |
b3ac022cb
|
853 |
sig->nr_threads = 1; |
1da177e4c
|
854 |
atomic_set(&sig->live, 1); |
b3ac022cb
|
855 |
atomic_set(&sig->sigcnt, 1); |
1da177e4c
|
856 |
init_waitqueue_head(&sig->wait_chldexit); |
b3bfa0cba
|
857 858 |
if (clone_flags & CLONE_NEWPID) sig->flags |= SIGNAL_UNKILLABLE; |
db51aeccd
|
859 |
sig->curr_target = tsk; |
1da177e4c
|
860 861 |
init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); |
c9cb2e3d7
|
862 |
hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
1da177e4c
|
863 |
sig->real_timer.function = it_real_fn; |
1da177e4c
|
864 |
|
1da177e4c
|
865 866 867 |
task_lock(current->group_leader); memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); |
6279a751f
|
868 |
posix_cpu_timers_init_group(sig); |
522ed7767
|
869 |
tty_audit_fork(sig); |
5091faa44
|
870 |
sched_autogroup_fork(sig); |
522ed7767
|
871 |
|
28b83c519
|
872 |
sig->oom_adj = current->signal->oom_adj; |
a63d83f42
|
873 |
sig->oom_score_adj = current->signal->oom_score_adj; |
dabb16f63
|
874 |
sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
28b83c519
|
875 |
|
9b1bf12d5
|
876 |
mutex_init(&sig->cred_guard_mutex); |
1da177e4c
|
877 878 |
return 0; } |
a39bc5169
|
879 |
static void copy_flags(unsigned long clone_flags, struct task_struct *p) |
1da177e4c
|
880 881 |
{ unsigned long new_flags = p->flags; |
21aa9af03
|
882 |
new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); |
1da177e4c
|
883 |
new_flags |= PF_FORKNOEXEC; |
09a05394f
|
884 |
new_flags |= PF_STARTING; |
1da177e4c
|
885 |
p->flags = new_flags; |
2e1318956
|
886 |
clear_freeze_flag(p); |
1da177e4c
|
887 |
} |
17da2bd90
|
888 |
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) |
1da177e4c
|
889 890 |
{ current->clear_child_tid = tidptr; |
b488893a3
|
891 |
return task_pid_vnr(current); |
1da177e4c
|
892 |
} |
a39bc5169
|
893 |
static void rt_mutex_init_task(struct task_struct *p) |
23f78d4a0
|
894 |
{ |
1d6154825
|
895 |
raw_spin_lock_init(&p->pi_lock); |
e29e175b0
|
896 |
#ifdef CONFIG_RT_MUTEXES |
1d6154825
|
897 |
plist_head_init_raw(&p->pi_waiters, &p->pi_lock); |
23f78d4a0
|
898 |
p->pi_blocked_on = NULL; |
23f78d4a0
|
899 900 |
#endif } |
cf475ad28
|
901 902 903 904 905 906 |
#ifdef CONFIG_MM_OWNER void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { mm->owner = p; } #endif /* CONFIG_MM_OWNER */ |
1da177e4c
|
907 |
/* |
f06febc96
|
908 909 910 911 912 913 914 915 916 917 918 919 920 |
* Initialize POSIX timer handling for a single task. */ static void posix_cpu_timers_init(struct task_struct *tsk) { tsk->cputime_expires.prof_exp = cputime_zero; tsk->cputime_expires.virt_exp = cputime_zero; tsk->cputime_expires.sched_exp = 0; INIT_LIST_HEAD(&tsk->cpu_timers[0]); INIT_LIST_HEAD(&tsk->cpu_timers[1]); INIT_LIST_HEAD(&tsk->cpu_timers[2]); } /* |
1da177e4c
|
921 922 923 924 925 926 927 |
* This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ |
36c8b5868
|
928 929 930 931 |
static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, |
36c8b5868
|
932 |
int __user *child_tidptr, |
09a05394f
|
933 934 |
struct pid *pid, int trace) |
1da177e4c
|
935 936 |
{ int retval; |
a24efe62d
|
937 |
struct task_struct *p; |
b4f48b636
|
938 |
int cgroup_callbacks_done = 0; |
1da177e4c
|
939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 |
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); |
123be07b0
|
957 958 959 960 961 962 963 964 965 |
/* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); |
1da177e4c
|
966 967 968 969 970 971 972 973 |
retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; |
f7e8b616e
|
974 |
ftrace_graph_init_task(p); |
bea493a03
|
975 |
rt_mutex_init_task(p); |
d12c1a379
|
976 |
#ifdef CONFIG_PROVE_LOCKING |
de30a2b35
|
977 978 979 |
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif |
1da177e4c
|
980 |
retval = -EAGAIN; |
3b11a1dec
|
981 |
if (atomic_read(&p->real_cred->user->processes) >= |
78d7d407b
|
982 |
task_rlimit(p, RLIMIT_NPROC)) { |
1da177e4c
|
983 |
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
18b6e0414
|
984 |
p->real_cred->user != INIT_USER) |
1da177e4c
|
985 986 |
goto bad_fork_free; } |
f1752eec6
|
987 988 989 |
retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; |
1da177e4c
|
990 991 992 993 994 995 |
/* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ |
04ec93fe9
|
996 |
retval = -EAGAIN; |
1da177e4c
|
997 998 |
if (nr_threads >= max_threads) goto bad_fork_cleanup_count; |
a1261f546
|
999 |
if (!try_module_get(task_thread_info(p)->exec_domain->module)) |
1da177e4c
|
1000 |
goto bad_fork_cleanup_count; |
1da177e4c
|
1001 |
p->did_exec = 0; |
ca74e92b4
|
1002 |
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
1da177e4c
|
1003 |
copy_flags(clone_flags, p); |
1da177e4c
|
1004 1005 |
INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); |
f41d911f8
|
1006 |
rcu_copy_process(p); |
1da177e4c
|
1007 1008 |
p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); |
1da177e4c
|
1009 |
|
1da177e4c
|
1010 1011 1012 1013 |
init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; |
9ac52315d
|
1014 |
p->gtime = cputime_zero; |
c66f08be7
|
1015 1016 |
p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; |
d99ca3b97
|
1017 |
#ifndef CONFIG_VIRT_CPU_ACCOUNTING |
73a2bcb0e
|
1018 |
p->prev_utime = cputime_zero; |
9301899be
|
1019 |
p->prev_stime = cputime_zero; |
d99ca3b97
|
1020 |
#endif |
a3a2e76c7
|
1021 1022 1023 |
#if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif |
172ba844a
|
1024 |
|
6976675d9
|
1025 |
p->default_timer_slack_ns = current->timer_slack_ns; |
5995477ab
|
1026 |
task_io_accounting_init(&p->ioac); |
1da177e4c
|
1027 |
acct_clear_integrals(p); |
f06febc96
|
1028 |
posix_cpu_timers_init(p); |
1da177e4c
|
1029 1030 1031 |
p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); |
924b42d5a
|
1032 1033 |
p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); |
1da177e4c
|
1034 |
p->io_context = NULL; |
1da177e4c
|
1035 |
p->audit_context = NULL; |
b4f48b636
|
1036 |
cgroup_fork(p); |
1da177e4c
|
1037 |
#ifdef CONFIG_NUMA |
846a16bf0
|
1038 |
p->mempolicy = mpol_dup(p->mempolicy); |
1da177e4c
|
1039 1040 1041 |
if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; |
b4f48b636
|
1042 |
goto bad_fork_cleanup_cgroup; |
1da177e4c
|
1043 |
} |
c61afb181
|
1044 |
mpol_fix_fork_child_flag(p); |
1da177e4c
|
1045 |
#endif |
de30a2b35
|
1046 1047 |
#ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; |
b36e4758d
|
1048 1049 1050 |
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else |
de30a2b35
|
1051 |
p->hardirqs_enabled = 0; |
b36e4758d
|
1052 |
#endif |
de30a2b35
|
1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 |
p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif |
fbb9ce953
|
1065 1066 1067 1068 1069 |
#ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif |
1da177e4c
|
1070 |
|
408894ee4
|
1071 1072 1073 |
#ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif |
569b846df
|
1074 1075 1076 1077 |
#ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif |
0f4814065
|
1078 |
|
3c90e6e99
|
1079 1080 |
/* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); |
6ab423e0e
|
1081 |
|
cdd6c482c
|
1082 |
retval = perf_event_init_task(p); |
6ab423e0e
|
1083 1084 |
if (retval) goto bad_fork_cleanup_policy; |
3c90e6e99
|
1085 |
|
1da177e4c
|
1086 |
if ((retval = audit_alloc(p))) |
f1752eec6
|
1087 |
goto bad_fork_cleanup_policy; |
1da177e4c
|
1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 |
/* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; |
ab516013a
|
1101 |
if ((retval = copy_namespaces(clone_flags, p))) |
d84f4f992
|
1102 |
goto bad_fork_cleanup_mm; |
fadad878c
|
1103 |
if ((retval = copy_io(clone_flags, p))) |
fd0928df9
|
1104 |
goto bad_fork_cleanup_namespaces; |
6f2c55b84
|
1105 |
retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); |
1da177e4c
|
1106 |
if (retval) |
fd0928df9
|
1107 |
goto bad_fork_cleanup_io; |
1da177e4c
|
1108 |
|
425fb2b4b
|
1109 1110 |
if (pid != &init_struct_pid) { retval = -ENOMEM; |
61bce0f13
|
1111 |
pid = alloc_pid(p->nsproxy->pid_ns); |
425fb2b4b
|
1112 |
if (!pid) |
fd0928df9
|
1113 |
goto bad_fork_cleanup_io; |
425fb2b4b
|
1114 1115 1116 1117 1118 1119 |
} p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; |
e885dcde7
|
1120 1121 1122 |
if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) |
f7e8b616e
|
1123 |
goto bad_fork_free_pid; |
e885dcde7
|
1124 |
} |
1da177e4c
|
1125 1126 1127 1128 1129 |
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; |
73c101011
|
1130 1131 1132 |
#ifdef CONFIG_BLOCK p->plug = NULL; #endif |
42b2dd0a0
|
1133 |
#ifdef CONFIG_FUTEX |
8f17d3a50
|
1134 1135 1136 1137 |
p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif |
c87e2837b
|
1138 1139 |
INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; |
42b2dd0a0
|
1140 |
#endif |
1da177e4c
|
1141 |
/* |
f9a3879ab
|
1142 1143 1144 1145 1146 1147 |
* sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* |
6580807da
|
1148 1149 |
* Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. |
1da177e4c
|
1150 |
*/ |
6580807da
|
1151 |
user_disable_single_step(p); |
1da177e4c
|
1152 |
clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); |
ed75e8d58
|
1153 1154 1155 |
#ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif |
9745512ce
|
1156 |
clear_all_latency_tracing(p); |
1da177e4c
|
1157 |
|
1da177e4c
|
1158 1159 1160 1161 |
/* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; |
1da177e4c
|
1162 1163 1164 1165 1166 |
/* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; |
47e65328a
|
1167 |
INIT_LIST_HEAD(&p->thread_group); |
1da177e4c
|
1168 |
|
b4f48b636
|
1169 1170 1171 1172 1173 |
/* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; |
1da177e4c
|
1174 1175 |
/* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); |
1da177e4c
|
1176 |
/* CLONE_PARENT re-uses the old parent */ |
2d5516cbb
|
1177 |
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { |
1da177e4c
|
1178 |
p->real_parent = current->real_parent; |
2d5516cbb
|
1179 1180 |
p->parent_exec_id = current->parent_exec_id; } else { |
1da177e4c
|
1181 |
p->real_parent = current; |
2d5516cbb
|
1182 1183 |
p->parent_exec_id = current->self_exec_id; } |
1da177e4c
|
1184 |
|
3f17da699
|
1185 |
spin_lock(¤t->sighand->siglock); |
4a2c7a783
|
1186 1187 1188 1189 1190 1191 1192 1193 1194 |
/* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ |
23ff44402
|
1195 |
recalc_sigpending(); |
4a2c7a783
|
1196 1197 1198 1199 |
if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; |
f7e8b616e
|
1200 |
goto bad_fork_free_pid; |
4a2c7a783
|
1201 |
} |
1da177e4c
|
1202 |
if (clone_flags & CLONE_THREAD) { |
b3ac022cb
|
1203 |
current->signal->nr_threads++; |
4ab6c0833
|
1204 |
atomic_inc(¤t->signal->live); |
b3ac022cb
|
1205 |
atomic_inc(¤t->signal->sigcnt); |
1da177e4c
|
1206 |
p->group_leader = current->group_leader; |
47e65328a
|
1207 |
list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
1da177e4c
|
1208 |
} |
73b9ebfe1
|
1209 |
if (likely(p->pid)) { |
09a05394f
|
1210 |
tracehook_finish_clone(p, clone_flags, trace); |
73b9ebfe1
|
1211 1212 |
if (thread_group_leader(p)) { |
45a68628d
|
1213 |
if (is_child_reaper(pid)) |
30e49c263
|
1214 |
p->nsproxy->pid_ns->child_reaper = p; |
73b9ebfe1
|
1215 |
|
fea9d1755
|
1216 |
p->signal->leader_pid = pid; |
9c9f4ded9
|
1217 |
p->signal->tty = tty_kref_get(current->signal->tty); |
5cd17569f
|
1218 1219 |
attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); |
9cd80bbb0
|
1220 |
list_add_tail(&p->sibling, &p->real_parent->children); |
5e85d4abe
|
1221 |
list_add_tail_rcu(&p->tasks, &init_task.tasks); |
909ea9646
|
1222 |
__this_cpu_inc(process_counts); |
73b9ebfe1
|
1223 |
} |
85868995d
|
1224 |
attach_pid(p, PIDTYPE_PID, pid); |
73b9ebfe1
|
1225 |
nr_threads++; |
1da177e4c
|
1226 |
} |
1da177e4c
|
1227 |
total_forks++; |
3f17da699
|
1228 |
spin_unlock(¤t->sighand->siglock); |
1da177e4c
|
1229 |
write_unlock_irq(&tasklist_lock); |
c13cf856c
|
1230 |
proc_fork_connector(p); |
817929ec2
|
1231 |
cgroup_post_fork(p); |
cdd6c482c
|
1232 |
perf_event_fork(p); |
1da177e4c
|
1233 |
return p; |
425fb2b4b
|
1234 1235 1236 |
bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); |
fd0928df9
|
1237 |
bad_fork_cleanup_io: |
b69f22920
|
1238 1239 |
if (p->io_context) exit_io_context(p); |
ab516013a
|
1240 |
bad_fork_cleanup_namespaces: |
444f378b2
|
1241 |
exit_task_namespaces(p); |
1da177e4c
|
1242 |
bad_fork_cleanup_mm: |
3d5992d2a
|
1243 1244 1245 1246 1247 |
if (p->mm) { task_lock(p); if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) atomic_dec(&p->mm->oom_disable_count); task_unlock(p); |
1da177e4c
|
1248 |
mmput(p->mm); |
3d5992d2a
|
1249 |
} |
1da177e4c
|
1250 |
bad_fork_cleanup_signal: |
4ab6c0833
|
1251 |
if (!(clone_flags & CLONE_THREAD)) |
1c5354de9
|
1252 |
free_signal_struct(p->signal); |
1da177e4c
|
1253 |
bad_fork_cleanup_sighand: |
a7e5328a0
|
1254 |
__cleanup_sighand(p->sighand); |
1da177e4c
|
1255 1256 1257 1258 1259 1260 1261 1262 |
bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); |
1da177e4c
|
1263 |
bad_fork_cleanup_policy: |
cdd6c482c
|
1264 |
perf_event_free_task(p); |
1da177e4c
|
1265 |
#ifdef CONFIG_NUMA |
f0be3d32b
|
1266 |
mpol_put(p->mempolicy); |
b4f48b636
|
1267 |
bad_fork_cleanup_cgroup: |
1da177e4c
|
1268 |
#endif |
b4f48b636
|
1269 |
cgroup_exit(p, cgroup_callbacks_done); |
35df17c57
|
1270 |
delayacct_tsk_free(p); |
a1261f546
|
1271 |
module_put(task_thread_info(p)->exec_domain->module); |
1da177e4c
|
1272 |
bad_fork_cleanup_count: |
d84f4f992
|
1273 |
atomic_dec(&p->cred->user->processes); |
e0e817392
|
1274 |
exit_creds(p); |
1da177e4c
|
1275 1276 |
bad_fork_free: free_task(p); |
fe7d37d1f
|
1277 1278 |
fork_out: return ERR_PTR(retval); |
1da177e4c
|
1279 |
} |
6b2fb3c65
|
1280 |
noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs) |
1da177e4c
|
1281 1282 1283 1284 |
{ memset(regs, 0, sizeof(struct pt_regs)); return regs; } |
f106eee10
|
1285 1286 1287 1288 1289 1290 1291 1292 1293 |
static inline void init_idle_pids(struct pid_link *links) { enum pid_type type; for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { INIT_HLIST_NODE(&links[type].node); /* not really needed */ links[type].pid = &init_struct_pid; } } |
9abcf40b1
|
1294 |
struct task_struct * __cpuinit fork_idle(int cpu) |
1da177e4c
|
1295 |
{ |
36c8b5868
|
1296 |
struct task_struct *task; |
1da177e4c
|
1297 |
struct pt_regs regs; |
30e49c263
|
1298 |
task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, |
09a05394f
|
1299 |
&init_struct_pid, 0); |
f106eee10
|
1300 1301 |
if (!IS_ERR(task)) { init_idle_pids(task->pids); |
753ca4f31
|
1302 |
init_idle(task, cpu); |
f106eee10
|
1303 |
} |
73b9ebfe1
|
1304 |
|
1da177e4c
|
1305 1306 |
return task; } |
1da177e4c
|
1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 |
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; |
92476d7fc
|
1322 |
long nr; |
1da177e4c
|
1323 |
|
bdff746a3
|
1324 |
/* |
18b6e0414
|
1325 1326 1327 1328 1329 1330 1331 1332 1333 |
* Do some preliminary argument and permissions checking before we * actually start allocating stuff */ if (clone_flags & CLONE_NEWUSER) { if (clone_flags & CLONE_THREAD) return -EINVAL; /* hopefully this check will go away when userns support is * complete */ |
7657d9049
|
1334 1335 |
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || !capable(CAP_SETGID)) |
18b6e0414
|
1336 1337 1338 1339 |
return -EPERM; } /* |
09a05394f
|
1340 1341 1342 1343 |
* When called from kernel_thread, don't do user tracing stuff. */ if (likely(user_mode(regs))) trace = tracehook_prepare_clone(clone_flags); |
1da177e4c
|
1344 |
|
a6f5e0637
|
1345 |
p = copy_process(clone_flags, stack_start, regs, stack_size, |
09a05394f
|
1346 |
child_tidptr, NULL, trace); |
1da177e4c
|
1347 1348 1349 1350 1351 1352 |
/* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; |
0a16b6075
|
1353 |
trace_sched_process_fork(current, p); |
6c5f3e7b4
|
1354 |
nr = task_pid_vnr(p); |
30e49c263
|
1355 1356 1357 |
if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); |
a6f5e0637
|
1358 |
|
1da177e4c
|
1359 1360 1361 1362 |
if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } |
a64e64944
|
1363 |
audit_finish_fork(p); |
087eb4370
|
1364 |
tracehook_report_clone(regs, clone_flags, nr, p); |
09a05394f
|
1365 1366 1367 1368 1369 1370 1371 1372 |
/* * We set PF_STARTING at creation in case tracing wants to * use this to distinguish a fully live task from one that * hasn't gotten to tracehook_report_clone() yet. Now we * clear it and set the child going. */ p->flags &= ~PF_STARTING; |
43bb40c9e
|
1373 |
wake_up_new_task(p, clone_flags); |
1da177e4c
|
1374 |
|
09a05394f
|
1375 1376 |
tracehook_report_clone_complete(trace, regs, clone_flags, nr, p); |
1da177e4c
|
1377 |
if (clone_flags & CLONE_VFORK) { |
ba96a0c88
|
1378 |
freezer_do_not_count(); |
1da177e4c
|
1379 |
wait_for_completion(&vfork); |
ba96a0c88
|
1380 |
freezer_count(); |
daded34be
|
1381 |
tracehook_report_vfork_done(p, nr); |
1da177e4c
|
1382 1383 |
} } else { |
92476d7fc
|
1384 |
nr = PTR_ERR(p); |
1da177e4c
|
1385 |
} |
92476d7fc
|
1386 |
return nr; |
1da177e4c
|
1387 |
} |
5fd63b308
|
1388 1389 1390 |
#ifndef ARCH_MIN_MMSTRUCT_ALIGN #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif |
51cc50685
|
1391 |
static void sighand_ctor(void *data) |
aa1757f90
|
1392 1393 |
{ struct sighand_struct *sighand = data; |
a35afb830
|
1394 |
spin_lock_init(&sighand->siglock); |
b8fceee17
|
1395 |
init_waitqueue_head(&sighand->signalfd_wqh); |
aa1757f90
|
1396 |
} |
1da177e4c
|
1397 1398 1399 1400 |
void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, |
2dff44052
|
1401 1402 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| SLAB_NOTRACK, sighand_ctor); |
1da177e4c
|
1403 1404 |
signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, |
2dff44052
|
1405 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
20c2df83d
|
1406 |
files_cachep = kmem_cache_create("files_cache", |
1da177e4c
|
1407 |
sizeof(struct files_struct), 0, |
2dff44052
|
1408 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
20c2df83d
|
1409 |
fs_cachep = kmem_cache_create("fs_cache", |
1da177e4c
|
1410 |
sizeof(struct fs_struct), 0, |
2dff44052
|
1411 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
1da177e4c
|
1412 |
mm_cachep = kmem_cache_create("mm_struct", |
5fd63b308
|
1413 |
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
2dff44052
|
1414 |
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
33e5d7697
|
1415 |
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); |
8feae1311
|
1416 |
mmap_init(); |
1da177e4c
|
1417 |
} |
cf2e340f4
|
1418 |
|
cf2e340f4
|
1419 |
/* |
9bfb23fc4
|
1420 |
* Check constraints on flags passed to the unshare system call. |
cf2e340f4
|
1421 |
*/ |
9bfb23fc4
|
1422 |
static int check_unshare_flags(unsigned long unshare_flags) |
cf2e340f4
|
1423 |
{ |
9bfb23fc4
|
1424 1425 1426 1427 |
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) return -EINVAL; |
cf2e340f4
|
1428 |
/* |
9bfb23fc4
|
1429 1430 1431 |
* Not implemented, but pretend it works if there is nothing to * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND * needs to unshare vm. |
cf2e340f4
|
1432 |
*/ |
9bfb23fc4
|
1433 1434 1435 1436 1437 |
if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { /* FIXME: get_task_mm() increments ->mm_users */ if (atomic_read(¤t->mm->mm_users) > 1) return -EINVAL; } |
cf2e340f4
|
1438 1439 1440 1441 1442 |
return 0; } /* |
99d1419d9
|
1443 |
* Unshare the filesystem structure if it is being shared |
cf2e340f4
|
1444 1445 1446 1447 |
*/ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) { struct fs_struct *fs = current->fs; |
498052bba
|
1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 |
if (!(unshare_flags & CLONE_FS) || !fs) return 0; /* don't need lock here; in the worst case we'll do useless copy */ if (fs->users == 1) return 0; *new_fsp = copy_fs_struct(fs); if (!*new_fsp) return -ENOMEM; |
cf2e340f4
|
1458 1459 1460 1461 1462 |
return 0; } /* |
a016f3389
|
1463 |
* Unshare file descriptor table if it is being shared |
cf2e340f4
|
1464 1465 1466 1467 |
*/ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) { struct files_struct *fd = current->files; |
a016f3389
|
1468 |
int error = 0; |
cf2e340f4
|
1469 1470 |
if ((unshare_flags & CLONE_FILES) && |
a016f3389
|
1471 1472 1473 1474 1475 |
(fd && atomic_read(&fd->count) > 1)) { *new_fdp = dup_fd(fd, &error); if (!*new_fdp) return error; } |
cf2e340f4
|
1476 1477 1478 1479 1480 |
return 0; } /* |
cf2e340f4
|
1481 1482 1483 1484 1485 1486 1487 |
* unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ |
6559eed8c
|
1488 |
SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) |
cf2e340f4
|
1489 |
{ |
cf2e340f4
|
1490 |
struct fs_struct *fs, *new_fs = NULL; |
cf2e340f4
|
1491 |
struct files_struct *fd, *new_fd = NULL; |
cf7b708c8
|
1492 |
struct nsproxy *new_nsproxy = NULL; |
9edff4ab1
|
1493 |
int do_sysvsem = 0; |
9bfb23fc4
|
1494 |
int err; |
cf2e340f4
|
1495 |
|
9bfb23fc4
|
1496 1497 |
err = check_unshare_flags(unshare_flags); if (err) |
06f9d4f94
|
1498 |
goto bad_unshare_out; |
6013f67fc
|
1499 |
/* |
9bfb23fc4
|
1500 1501 1502 1503 1504 |
* If unsharing namespace, must also unshare filesystem information. */ if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; /* |
6013f67fc
|
1505 1506 1507 1508 1509 |
* CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) |
9edff4ab1
|
1510 |
do_sysvsem = 1; |
cf2e340f4
|
1511 |
if ((err = unshare_fs(unshare_flags, &new_fs))) |
9bfb23fc4
|
1512 |
goto bad_unshare_out; |
cf2e340f4
|
1513 |
if ((err = unshare_fd(unshare_flags, &new_fd))) |
9bfb23fc4
|
1514 |
goto bad_unshare_cleanup_fs; |
e3222c4ec
|
1515 1516 |
if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs))) |
9edff4ab1
|
1517 |
goto bad_unshare_cleanup_fd; |
c0b2fc316
|
1518 |
|
9bfb23fc4
|
1519 |
if (new_fs || new_fd || do_sysvsem || new_nsproxy) { |
9edff4ab1
|
1520 1521 1522 1523 1524 1525 |
if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). */ exit_sem(current); } |
ab516013a
|
1526 |
|
c0b2fc316
|
1527 |
if (new_nsproxy) { |
cf7b708c8
|
1528 1529 |
switch_task_namespaces(current, new_nsproxy); new_nsproxy = NULL; |
c0b2fc316
|
1530 |
} |
cf2e340f4
|
1531 |
|
cf7b708c8
|
1532 |
task_lock(current); |
cf2e340f4
|
1533 1534 |
if (new_fs) { fs = current->fs; |
2a4419b5b
|
1535 |
spin_lock(&fs->lock); |
cf2e340f4
|
1536 |
current->fs = new_fs; |
498052bba
|
1537 1538 1539 1540 |
if (--fs->users) new_fs = NULL; else new_fs = fs; |
2a4419b5b
|
1541 |
spin_unlock(&fs->lock); |
cf2e340f4
|
1542 |
} |
cf2e340f4
|
1543 1544 1545 1546 1547 1548 1549 1550 |
if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); } |
c0b2fc316
|
1551 |
if (new_nsproxy) |
444f378b2
|
1552 |
put_nsproxy(new_nsproxy); |
c0b2fc316
|
1553 |
|
cf2e340f4
|
1554 1555 1556 |
bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); |
cf2e340f4
|
1557 1558 |
bad_unshare_cleanup_fs: if (new_fs) |
498052bba
|
1559 |
free_fs_struct(new_fs); |
cf2e340f4
|
1560 |
|
cf2e340f4
|
1561 1562 1563 |
bad_unshare_out: return err; } |
3b1253880
|
1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 |
/* * Helper to unshare the files of the current task. * We don't want to expose copy_files internals to * the exec layer of the kernel. */ int unshare_files(struct files_struct **displaced) { struct task_struct *task = current; |
50704516f
|
1574 |
struct files_struct *copy = NULL; |
3b1253880
|
1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 |
int error; error = unshare_fd(CLONE_FILES, ©); if (error || !copy) { *displaced = NULL; return error; } *displaced = task->files; task_lock(task); task->files = copy; task_unlock(task); return 0; } |