Blame view

kernel/fork.c 59.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   *  linux/kernel/fork.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
  /*
   *  'fork.c' contains the help-routines for the 'fork' system call
   * (see also entry.S and others).
   * Fork is rather simple, once you get the hang of it, but the memory
   * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
  #include <linux/slab.h>
4eb5aaa3a   Ingo Molnar   sched/headers: Pr...
14
  #include <linux/sched/autogroup.h>
6e84f3152   Ingo Molnar   sched/headers: Pr...
15
  #include <linux/sched/mm.h>
f7ccbae45   Ingo Molnar   sched/headers: Pr...
16
  #include <linux/sched/coredump.h>
8703e8a46   Ingo Molnar   sched/headers: Pr...
17
  #include <linux/sched/user.h>
6a3827d75   Ingo Molnar   sched/headers: Pr...
18
  #include <linux/sched/numa_balancing.h>
03441a348   Ingo Molnar   sched/headers: Pr...
19
  #include <linux/sched/stat.h>
299300258   Ingo Molnar   sched/headers: Pr...
20
  #include <linux/sched/task.h>
68db0cf10   Ingo Molnar   sched/headers: Pr...
21
  #include <linux/sched/task_stack.h>
32ef5517c   Ingo Molnar   sched/headers: Pr...
22
  #include <linux/sched/cputime.h>
037741a6d   Ingo Molnar   sched/headers: Pr...
23
  #include <linux/rtmutex.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
  #include <linux/init.h>
  #include <linux/unistd.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
  #include <linux/module.h>
  #include <linux/vmalloc.h>
  #include <linux/completion.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
30
31
32
  #include <linux/personality.h>
  #include <linux/mempolicy.h>
  #include <linux/sem.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
33
  #include <linux/fdtable.h>
da9cbc873   Jens Axboe   block: blkdev.h c...
34
  #include <linux/iocontext.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35
36
37
  #include <linux/key.h>
  #include <linux/binfmts.h>
  #include <linux/mman.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
38
  #include <linux/mmu_notifier.h>
133ff0eac   Jérôme Glisse   mm/hmm: heterogen...
39
  #include <linux/hmm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
  #include <linux/fs.h>
615d6e875   Davidlohr Bueso   mm: per-thread vm...
41
42
  #include <linux/mm.h>
  #include <linux/vmacache.h>
ab516013a   Serge E. Hallyn   [PATCH] namespace...
43
  #include <linux/nsproxy.h>
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
44
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
  #include <linux/cpu.h>
b4f48b636   Paul Menage   Task Control Grou...
46
  #include <linux/cgroup.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
47
  #include <linux/security.h>
a1e78772d   Mel Gorman   hugetlb: reserve ...
48
  #include <linux/hugetlb.h>
e2cfabdfd   Will Drewry   seccomp: add syst...
49
  #include <linux/seccomp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50
51
52
53
  #include <linux/swap.h>
  #include <linux/syscalls.h>
  #include <linux/jiffies.h>
  #include <linux/futex.h>
8141c7f3e   Linus Torvalds   Move "exit_robust...
54
  #include <linux/compat.h>
207205a2b   Eric Dumazet   kthread: NUMA awa...
55
  #include <linux/kthread.h>
7c3ab7381   Andrew Morton   [PATCH] io-accoun...
56
  #include <linux/task_io_accounting_ops.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
57
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
58
59
60
  #include <linux/ptrace.h>
  #include <linux/mount.h>
  #include <linux/audit.h>
78fb74669   Pavel Emelianov   Memory controller...
61
  #include <linux/memcontrol.h>
f201ae235   Frederic Weisbecker   tracing/function-...
62
  #include <linux/ftrace.h>
5e2bf0142   Mike Galbraith   namespaces, pid_n...
63
  #include <linux/proc_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64
65
  #include <linux/profile.h>
  #include <linux/rmap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
66
  #include <linux/ksm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67
  #include <linux/acct.h>
893e26e61   Pavel Emelyanov   userfaultfd: non-...
68
  #include <linux/userfaultfd_k.h>
8f0ab5147   Jay Lan   [PATCH] csa: conv...
69
  #include <linux/tsacct_kern.h>
9f46080c4   Matt Helsley   [PATCH] Process E...
70
  #include <linux/cn_proc.h>
ba96a0c88   Rafael J. Wysocki   freezer: fix vfor...
71
  #include <linux/freezer.h>
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
72
  #include <linux/delayacct.h>
ad4ecbcba   Shailabh Nagar   [PATCH] delay acc...
73
  #include <linux/taskstats_kern.h>
0a4254058   Arjan van de Ven   [PATCH] Add the c...
74
  #include <linux/random.h>
522ed7767   Miloslav Trmac   Audit: add TTY in...
75
  #include <linux/tty.h>
fd0928df9   Jens Axboe   ioprio: move io p...
76
  #include <linux/blkdev.h>
5ad4e53bd   Al Viro   Get rid of indire...
77
  #include <linux/fs_struct.h>
7c9f8861e   Eric Sandeen   stackprotector: u...
78
  #include <linux/magic.h>
cdd6c482c   Ingo Molnar   perf: Do the big ...
79
  #include <linux/perf_event.h>
42c4ab41a   Stanislaw Gruszka   itimers: Merge IT...
80
  #include <linux/posix-timers.h>
8e7cac798   Avi Kivity   core: Fix user re...
81
  #include <linux/user-return-notifier.h>
3d5992d2a   Ying Han   oom: add per-mm o...
82
  #include <linux/oom.h>
ba76149f4   Andrea Arcangeli   thp: khugepaged
83
  #include <linux/khugepaged.h>
d80e731ec   Oleg Nesterov   epoll: introduce ...
84
  #include <linux/signalfd.h>
0326f5a94   Srikar Dronamraju   uprobes/core: Han...
85
  #include <linux/uprobes.h>
a27bb332c   Kent Overstreet   aio: don't includ...
86
  #include <linux/aio.h>
52f5684c8   Gideon Israel Dsouza   kernel: use macro...
87
  #include <linux/compiler.h>
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
88
  #include <linux/sysctl.h>
5c9a8750a   Dmitry Vyukov   kernel: add kcov ...
89
  #include <linux/kcov.h>
d83a7cb37   Josh Poimboeuf   livepatch: change...
90
  #include <linux/livepatch.h>
48ac3c18c   Mark Rutland   fork: allow arch-...
91
  #include <linux/thread_info.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
93
94
  
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
95
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96
97
98
  #include <asm/mmu_context.h>
  #include <asm/cacheflush.h>
  #include <asm/tlbflush.h>
ad8d75fff   Steven Rostedt   tracing/events: m...
99
  #include <trace/events/sched.h>
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
100
101
  #define CREATE_TRACE_POINTS
  #include <trace/events/task.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
102
  /*
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
103
104
105
106
107
108
109
110
111
112
   * Minimum number of threads to boot the kernel
   */
  #define MIN_THREADS 20
  
  /*
   * Maximum number of threads
   */
  #define MAX_THREADS FUTEX_TID_MASK
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
114
115
   * Protected counters by write_lock_irq(&tasklist_lock)
   */
  unsigned long total_forks;	/* Handle normal Linux uptimes. */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
116
  int nr_threads;			/* The idle threads do not count.. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
118
119
120
  
  int max_threads;		/* tunable limit on nr_threads */
  
  DEFINE_PER_CPU(unsigned long, process_counts) = 0;
c59923a15   Christoph Hellwig   [PATCH] remove th...
121
  __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
db1466b3e   Paul E. McKenney   rcu: Use wrapper ...
122
123
124
125
126
127
128
129
  
  #ifdef CONFIG_PROVE_RCU
  int lockdep_tasklist_lock_is_held(void)
  {
  	return lockdep_is_held(&tasklist_lock);
  }
  EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
  #endif /* #ifdef CONFIG_PROVE_RCU */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
131
132
133
134
  
  int nr_processes(void)
  {
  	int cpu;
  	int total = 0;
1d5107509   Ian Campbell   Correct nr_proces...
135
  	for_each_possible_cpu(cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
137
138
139
  		total += per_cpu(process_counts, cpu);
  
  	return total;
  }
f19b9f74b   Akinobu Mita   fork: fix error h...
140
141
142
  void __weak arch_release_task_struct(struct task_struct *tsk)
  {
  }
f5e102873   Thomas Gleixner   task_allocator: U...
143
  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
144
  static struct kmem_cache *task_struct_cachep;
41101809a   Thomas Gleixner   fork: Provide wea...
145
146
147
148
149
  
  static inline struct task_struct *alloc_task_struct_node(int node)
  {
  	return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
  }
41101809a   Thomas Gleixner   fork: Provide wea...
150
151
  static inline void free_task_struct(struct task_struct *tsk)
  {
41101809a   Thomas Gleixner   fork: Provide wea...
152
153
  	kmem_cache_free(task_struct_cachep, tsk);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
  #endif
b235beea9   Linus Torvalds   Clarify naming of...
155
  void __weak arch_release_thread_stack(unsigned long *stack)
f19b9f74b   Akinobu Mita   fork: fix error h...
156
157
  {
  }
b235beea9   Linus Torvalds   Clarify naming of...
158
  #ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
41101809a   Thomas Gleixner   fork: Provide wea...
159

0d15d74a1   Thomas Gleixner   fork: Provide kme...
160
161
162
163
  /*
   * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
   * kmemcache based allocator.
   */
ba14a194a   Andy Lutomirski   fork: Add generic...
164
  # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
ac496bf48   Andy Lutomirski   fork: Optimize ta...
165
166
167
168
169
170
171
172
  
  #ifdef CONFIG_VMAP_STACK
  /*
   * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
   * flush.  Try to minimize the number of calls by caching stacks.
   */
  #define NR_CACHED_STACKS 2
  static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
19659c59a   Hoeun Ryu   fork: free vmappe...
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
  
  static int free_vm_stack_cache(unsigned int cpu)
  {
  	struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
  	int i;
  
  	for (i = 0; i < NR_CACHED_STACKS; i++) {
  		struct vm_struct *vm_stack = cached_vm_stacks[i];
  
  		if (!vm_stack)
  			continue;
  
  		vfree(vm_stack->addr);
  		cached_vm_stacks[i] = NULL;
  	}
  
  	return 0;
  }
ac496bf48   Andy Lutomirski   fork: Optimize ta...
191
  #endif
ba14a194a   Andy Lutomirski   fork: Add generic...
192
  static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
b69c49b78   FUJITA Tomonori   clean up duplicat...
193
  {
ba14a194a   Andy Lutomirski   fork: Add generic...
194
  #ifdef CONFIG_VMAP_STACK
ac496bf48   Andy Lutomirski   fork: Optimize ta...
195
196
  	void *stack;
  	int i;
ac496bf48   Andy Lutomirski   fork: Optimize ta...
197
  	for (i = 0; i < NR_CACHED_STACKS; i++) {
112166f88   Christoph Lameter   kernel/fork.c: vi...
198
199
200
  		struct vm_struct *s;
  
  		s = this_cpu_xchg(cached_stacks[i], NULL);
ac496bf48   Andy Lutomirski   fork: Optimize ta...
201
202
203
  
  		if (!s)
  			continue;
ac496bf48   Andy Lutomirski   fork: Optimize ta...
204

ca1825518   Konstantin Khlebnikov   kmemleak: clear s...
205
206
  		/* Clear stale pointers from reused stack. */
  		memset(s->addr, 0, THREAD_SIZE);
2d5fc7ffa   Kees Cook   fork: uncondition...
207

ac496bf48   Andy Lutomirski   fork: Optimize ta...
208
  		tsk->stack_vm_area = s;
ac496bf48   Andy Lutomirski   fork: Optimize ta...
209
210
  		return s->addr;
  	}
ac496bf48   Andy Lutomirski   fork: Optimize ta...
211

48ac3c18c   Mark Rutland   fork: allow arch-...
212
  	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
ac496bf48   Andy Lutomirski   fork: Optimize ta...
213
  				     VMALLOC_START, VMALLOC_END,
19809c2da   Michal Hocko   mm, vmalloc: use ...
214
  				     THREADINFO_GFP,
ac496bf48   Andy Lutomirski   fork: Optimize ta...
215
216
  				     PAGE_KERNEL,
  				     0, node, __builtin_return_address(0));
ba14a194a   Andy Lutomirski   fork: Add generic...
217
218
219
220
221
222
223
224
225
226
  
  	/*
  	 * We can't call find_vm_area() in interrupt context, and
  	 * free_thread_stack() can be called in interrupt context,
  	 * so cache the vm_struct.
  	 */
  	if (stack)
  		tsk->stack_vm_area = find_vm_area(stack);
  	return stack;
  #else
4949148ad   Vladimir Davydov   mm: charge/unchar...
227
228
  	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
  					     THREAD_SIZE_ORDER);
b6a84016b   Eric Dumazet   mm: NUMA aware al...
229
230
  
  	return page ? page_address(page) : NULL;
ba14a194a   Andy Lutomirski   fork: Add generic...
231
  #endif
b69c49b78   FUJITA Tomonori   clean up duplicat...
232
  }
ba14a194a   Andy Lutomirski   fork: Add generic...
233
  static inline void free_thread_stack(struct task_struct *tsk)
b69c49b78   FUJITA Tomonori   clean up duplicat...
234
  {
ac496bf48   Andy Lutomirski   fork: Optimize ta...
235
236
  #ifdef CONFIG_VMAP_STACK
  	if (task_stack_vm_area(tsk)) {
ac496bf48   Andy Lutomirski   fork: Optimize ta...
237
  		int i;
ac496bf48   Andy Lutomirski   fork: Optimize ta...
238
  		for (i = 0; i < NR_CACHED_STACKS; i++) {
112166f88   Christoph Lameter   kernel/fork.c: vi...
239
240
  			if (this_cpu_cmpxchg(cached_stacks[i],
  					NULL, tsk->stack_vm_area) != NULL)
ac496bf48   Andy Lutomirski   fork: Optimize ta...
241
  				continue;
ac496bf48   Andy Lutomirski   fork: Optimize ta...
242
243
  			return;
  		}
ac496bf48   Andy Lutomirski   fork: Optimize ta...
244

0f110a9b9   Andrey Ryabinin   kernel/fork: use ...
245
  		vfree_atomic(tsk->stack);
ac496bf48   Andy Lutomirski   fork: Optimize ta...
246
247
248
249
250
  		return;
  	}
  #endif
  
  	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
b69c49b78   FUJITA Tomonori   clean up duplicat...
251
  }
0d15d74a1   Thomas Gleixner   fork: Provide kme...
252
  # else
b235beea9   Linus Torvalds   Clarify naming of...
253
  static struct kmem_cache *thread_stack_cache;
0d15d74a1   Thomas Gleixner   fork: Provide kme...
254

9521d3997   Michael Ellerman   Fix build break i...
255
  static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
0d15d74a1   Thomas Gleixner   fork: Provide kme...
256
257
  						  int node)
  {
b235beea9   Linus Torvalds   Clarify naming of...
258
  	return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
0d15d74a1   Thomas Gleixner   fork: Provide kme...
259
  }
ba14a194a   Andy Lutomirski   fork: Add generic...
260
  static void free_thread_stack(struct task_struct *tsk)
0d15d74a1   Thomas Gleixner   fork: Provide kme...
261
  {
ba14a194a   Andy Lutomirski   fork: Add generic...
262
  	kmem_cache_free(thread_stack_cache, tsk->stack);
0d15d74a1   Thomas Gleixner   fork: Provide kme...
263
  }
b235beea9   Linus Torvalds   Clarify naming of...
264
  void thread_stack_cache_init(void)
0d15d74a1   Thomas Gleixner   fork: Provide kme...
265
  {
b235beea9   Linus Torvalds   Clarify naming of...
266
  	thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE,
0d15d74a1   Thomas Gleixner   fork: Provide kme...
267
  					      THREAD_SIZE, 0, NULL);
b235beea9   Linus Torvalds   Clarify naming of...
268
  	BUG_ON(thread_stack_cache == NULL);
0d15d74a1   Thomas Gleixner   fork: Provide kme...
269
270
  }
  # endif
b69c49b78   FUJITA Tomonori   clean up duplicat...
271
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
272
  /* SLAB cache for signal_struct structures (tsk->signal) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
273
  static struct kmem_cache *signal_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
274
275
  
  /* SLAB cache for sighand_struct structures (tsk->sighand) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
276
  struct kmem_cache *sighand_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277
278
  
  /* SLAB cache for files_struct structures (tsk->files) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
279
  struct kmem_cache *files_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
281
  
  /* SLAB cache for fs_struct structures (tsk->fs) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
282
  struct kmem_cache *fs_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283
284
  
  /* SLAB cache for vm_area_struct structures */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
285
  struct kmem_cache *vm_area_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
  
  /* SLAB cache for mm_struct structures (tsk->mm) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
288
  static struct kmem_cache *mm_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289

ba14a194a   Andy Lutomirski   fork: Add generic...
290
  static void account_kernel_stack(struct task_struct *tsk, int account)
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
291
  {
ba14a194a   Andy Lutomirski   fork: Add generic...
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
  	void *stack = task_stack_page(tsk);
  	struct vm_struct *vm = task_stack_vm_area(tsk);
  
  	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
  
  	if (vm) {
  		int i;
  
  		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
  
  		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
  			mod_zone_page_state(page_zone(vm->pages[i]),
  					    NR_KERNEL_STACK_KB,
  					    PAGE_SIZE / 1024 * account);
  		}
  
  		/* All stack pages belong to the same memcg. */
ed52be7bf   Johannes Weiner   mm: memcontrol: u...
309
310
  		mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB,
  				     account * (THREAD_SIZE / 1024));
ba14a194a   Andy Lutomirski   fork: Add generic...
311
312
313
314
315
316
317
318
319
  	} else {
  		/*
  		 * All stack pages are in the same zone and belong to the
  		 * same memcg.
  		 */
  		struct page *first_page = virt_to_page(stack);
  
  		mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
  				    THREAD_SIZE / 1024 * account);
ed52be7bf   Johannes Weiner   mm: memcontrol: u...
320
321
  		mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB,
  				     account * (THREAD_SIZE / 1024));
ba14a194a   Andy Lutomirski   fork: Add generic...
322
  	}
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
323
  }
68f24b08e   Andy Lutomirski   sched/core: Free ...
324
  static void release_task_stack(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
325
  {
405c07597   Andy Lutomirski   fork: Add task st...
326
327
  	if (WARN_ON(tsk->state != TASK_DEAD))
  		return;  /* Better to leak the stack than to free prematurely */
ba14a194a   Andy Lutomirski   fork: Add generic...
328
  	account_kernel_stack(tsk, -1);
b235beea9   Linus Torvalds   Clarify naming of...
329
  	arch_release_thread_stack(tsk->stack);
ba14a194a   Andy Lutomirski   fork: Add generic...
330
  	free_thread_stack(tsk);
68f24b08e   Andy Lutomirski   sched/core: Free ...
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
  	tsk->stack = NULL;
  #ifdef CONFIG_VMAP_STACK
  	tsk->stack_vm_area = NULL;
  #endif
  }
  
  #ifdef CONFIG_THREAD_INFO_IN_TASK
  void put_task_stack(struct task_struct *tsk)
  {
  	if (atomic_dec_and_test(&tsk->stack_refcount))
  		release_task_stack(tsk);
  }
  #endif
  
  void free_task(struct task_struct *tsk)
  {
  #ifndef CONFIG_THREAD_INFO_IN_TASK
  	/*
  	 * The task is finally done with both the stack and thread_info,
  	 * so free both.
  	 */
  	release_task_stack(tsk);
  #else
  	/*
  	 * If the task had a separate stack allocation, it should be gone
  	 * by now.
  	 */
  	WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
  #endif
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
360
  	rt_mutex_debug_task_free(tsk);
fb52607af   Frederic Weisbecker   tracing/function-...
361
  	ftrace_graph_exit_task(tsk);
e2cfabdfd   Will Drewry   seccomp: add syst...
362
  	put_seccomp_filter(tsk);
f19b9f74b   Akinobu Mita   fork: fix error h...
363
  	arch_release_task_struct(tsk);
1da5c46fa   Oleg Nesterov   kthread: Make str...
364
365
  	if (tsk->flags & PF_KTHREAD)
  		free_kthread_struct(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
366
367
368
  	free_task_struct(tsk);
  }
  EXPORT_SYMBOL(free_task);
ea6d290ca   Oleg Nesterov   signals: make tas...
369
370
  static inline void free_signal_struct(struct signal_struct *sig)
  {
97101eb41   Oleg Nesterov   exit: move taskst...
371
  	taskstats_tgid_free(sig);
1c5354de9   Mike Galbraith   sched: Move sched...
372
  	sched_autogroup_exit(sig);
7283094ec   Michal Hocko   kernel, oom: fix ...
373
374
375
376
  	/*
  	 * __mmdrop is not safe to call from softirq context on x86 due to
  	 * pgd_dtor so postpone it to the async context
  	 */
26db62f17   Michal Hocko   oom: keep mm of t...
377
  	if (sig->oom_mm)
7283094ec   Michal Hocko   kernel, oom: fix ...
378
  		mmdrop_async(sig->oom_mm);
ea6d290ca   Oleg Nesterov   signals: make tas...
379
380
381
382
383
  	kmem_cache_free(signal_cachep, sig);
  }
  
  static inline void put_signal_struct(struct signal_struct *sig)
  {
1c5354de9   Mike Galbraith   sched: Move sched...
384
  	if (atomic_dec_and_test(&sig->sigcnt))
ea6d290ca   Oleg Nesterov   signals: make tas...
385
386
  		free_signal_struct(sig);
  }
158d9ebd1   Andrew Morton   [PATCH] resurrect...
387
  void __put_task_struct(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
388
  {
270f722d4   Eugene Teo   Fix tsk->exit_sta...
389
  	WARN_ON(!tsk->exit_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
390
391
  	WARN_ON(atomic_read(&tsk->usage));
  	WARN_ON(tsk == current);
2e91fa7f6   Tejun Heo   cgroup: keep zomb...
392
  	cgroup_free(tsk);
156654f49   Mike Galbraith   sched/numa: Move ...
393
  	task_numa_free(tsk);
1a2a4d06e   Kees Cook   security: create ...
394
  	security_task_free(tsk);
e0e817392   David Howells   CRED: Add some co...
395
  	exit_creds(tsk);
35df17c57   Shailabh Nagar   [PATCH] task dela...
396
  	delayacct_tsk_free(tsk);
ea6d290ca   Oleg Nesterov   signals: make tas...
397
  	put_signal_struct(tsk->signal);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
398
399
400
401
  
  	if (!profile_handoff_task(tsk))
  		free_task(tsk);
  }
77c100c83   Rik van Riel   export pid symbol...
402
  EXPORT_SYMBOL_GPL(__put_task_struct);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403

6c0a9fa62   Thomas Gleixner   fork: Remove the ...
404
  void __init __weak arch_task_cache_init(void) { }
61c4628b5   Suresh Siddha   x86, fpu: split F...
405

ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
406
407
408
  /*
   * set_max_threads
   */
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
409
  static void set_max_threads(unsigned int max_threads_suggested)
ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
410
  {
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
411
  	u64 threads;
ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
412
413
  
  	/*
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
414
415
  	 * The number of threads shall be limited such that the thread
  	 * structures may only consume a small part of the available memory.
ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
416
  	 */
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
417
418
419
420
421
  	if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64)
  		threads = MAX_THREADS;
  	else
  		threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
  				    (u64) THREAD_SIZE * 8UL);
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
422
423
  	if (threads > max_threads_suggested)
  		threads = max_threads_suggested;
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
424
  	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
425
  }
5aaeb5c01   Ingo Molnar   x86/fpu, sched: I...
426
427
428
429
  #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
  /* Initialized by the architecture: */
  int arch_task_struct_size __read_mostly;
  #endif
0c8c0f03e   Dave Hansen   x86/fpu, sched: D...
430

ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
431
  void __init fork_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
432
  {
25f9c0817   Eric W. Biederman   userns: Generaliz...
433
  	int i;
f5e102873   Thomas Gleixner   task_allocator: U...
434
  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
  #ifndef ARCH_MIN_TASKALIGN
e274795ea   Peter Zijlstra   locking/mutex: Fi...
436
  #define ARCH_MIN_TASKALIGN	0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
437
  #endif
95cb64c1f   Peter Zijlstra   fork: Fix task_st...
438
  	int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
e274795ea   Peter Zijlstra   locking/mutex: Fi...
439

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
  	/* create a slab on which task_structs can be allocated */
5d097056c   Vladimir Davydov   kmemcg: account c...
441
  	task_struct_cachep = kmem_cache_create("task_struct",
e274795ea   Peter Zijlstra   locking/mutex: Fi...
442
  			arch_task_struct_size, align,
ae63fd26b   Levin, Alexander (Sasha Levin)   kmemcheck: stop u...
443
  			SLAB_PANIC|SLAB_ACCOUNT, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
  #endif
61c4628b5   Suresh Siddha   x86, fpu: split F...
445
446
  	/* do the arch specific task caches init */
  	arch_task_cache_init();
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
447
  	set_max_threads(MAX_THREADS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
448
449
450
451
452
  
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
  		init_task.signal->rlim[RLIMIT_NPROC];
b376c3e1b   Eric W. Biederman   userns: Add a lim...
453

25f9c0817   Eric W. Biederman   userns: Generaliz...
454
455
456
  	for (i = 0; i < UCOUNT_COUNTS; i++) {
  		init_user_ns.ucount_max[i] = max_threads/2;
  	}
19659c59a   Hoeun Ryu   fork: free vmappe...
457
458
459
460
461
  
  #ifdef CONFIG_VMAP_STACK
  	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
  			  NULL, free_vm_stack_cache);
  #endif
b09be676e   Byungchul Park   locking/lockdep: ...
462
463
  
  	lockdep_init_task(&init_task);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
464
  }
52f5684c8   Gideon Israel Dsouza   kernel: use macro...
465
  int __weak arch_dup_task_struct(struct task_struct *dst,
61c4628b5   Suresh Siddha   x86, fpu: split F...
466
467
468
469
470
  					       struct task_struct *src)
  {
  	*dst = *src;
  	return 0;
  }
d4311ff1a   Aaron Tomlin   init/main.c: Give...
471
472
473
474
475
476
477
  void set_task_stack_end_magic(struct task_struct *tsk)
  {
  	unsigned long *stackend;
  
  	stackend = end_of_stack(tsk);
  	*stackend = STACK_END_MAGIC;	/* for overflow detection */
  }
725fc629f   Andi Kleen   kernek/fork.c: al...
478
  static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
479
480
  {
  	struct task_struct *tsk;
b235beea9   Linus Torvalds   Clarify naming of...
481
  	unsigned long *stack;
ba14a194a   Andy Lutomirski   fork: Add generic...
482
  	struct vm_struct *stack_vm_area;
3e26c149c   Peter Zijlstra   mm: dirty balanci...
483
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
484

725fc629f   Andi Kleen   kernek/fork.c: al...
485
486
  	if (node == NUMA_NO_NODE)
  		node = tsk_fork_get_node(orig);
504f52b54   Eric Dumazet   mm: NUMA aware al...
487
  	tsk = alloc_task_struct_node(node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
488
489
  	if (!tsk)
  		return NULL;
b235beea9   Linus Torvalds   Clarify naming of...
490
491
  	stack = alloc_thread_stack_node(tsk, node);
  	if (!stack)
f19b9f74b   Akinobu Mita   fork: fix error h...
492
  		goto free_tsk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
493

ba14a194a   Andy Lutomirski   fork: Add generic...
494
  	stack_vm_area = task_stack_vm_area(tsk);
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
495
  	err = arch_dup_task_struct(tsk, orig);
ba14a194a   Andy Lutomirski   fork: Add generic...
496
497
498
499
500
501
502
503
504
505
  
  	/*
  	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
  	 * sure they're properly initialized before using any stack-related
  	 * functions again.
  	 */
  	tsk->stack = stack;
  #ifdef CONFIG_VMAP_STACK
  	tsk->stack_vm_area = stack_vm_area;
  #endif
68f24b08e   Andy Lutomirski   sched/core: Free ...
506
507
508
  #ifdef CONFIG_THREAD_INFO_IN_TASK
  	atomic_set(&tsk->stack_refcount, 1);
  #endif
ba14a194a   Andy Lutomirski   fork: Add generic...
509

164c33c6a   Salman Qazi   sched: Fix fork()...
510
  	if (err)
b235beea9   Linus Torvalds   Clarify naming of...
511
  		goto free_stack;
164c33c6a   Salman Qazi   sched: Fix fork()...
512

dbd952127   Kees Cook   seccomp: introduc...
513
514
515
516
517
518
519
520
521
  #ifdef CONFIG_SECCOMP
  	/*
  	 * We must handle setting up seccomp filters once we're under
  	 * the sighand lock in case orig has changed between now and
  	 * then. Until then, filter must be NULL to avoid messing up
  	 * the usage counts on the error path calling free_task.
  	 */
  	tsk->seccomp.filter = NULL;
  #endif
87bec58a5   Andrew Morton   revert "sched: Fi...
522
523
  
  	setup_thread_stack(tsk, orig);
8e7cac798   Avi Kivity   core: Fix user re...
524
  	clear_user_return_notifier(tsk);
f26f9aff6   Mike Galbraith   Sched: fix skip_c...
525
  	clear_tsk_need_resched(tsk);
d4311ff1a   Aaron Tomlin   init/main.c: Give...
526
  	set_task_stack_end_magic(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
527

0a4254058   Arjan van de Ven   [PATCH] Add the c...
528
  #ifdef CONFIG_CC_STACKPROTECTOR
7cd815bce   Rik van Riel   fork,random: use ...
529
  	tsk->stack_canary = get_random_canary();
0a4254058   Arjan van de Ven   [PATCH] Add the c...
530
  #endif
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
531
532
533
534
535
  	/*
  	 * One for us, one for whoever does the "release_task()" (usually
  	 * parent)
  	 */
  	atomic_set(&tsk->usage, 2);
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
536
  #ifdef CONFIG_BLK_DEV_IO_TRACE
2056a782f   Jens Axboe   [PATCH] Block que...
537
  	tsk->btrace_seq = 0;
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
538
  #endif
a0aa7f68a   Jens Axboe   [PATCH] Don't inh...
539
  	tsk->splice_pipe = NULL;
5640f7685   Eric Dumazet   net: use a per ta...
540
  	tsk->task_frag.page = NULL;
093e5840a   Sebastian Andrzej Siewior   sched/core: Reset...
541
  	tsk->wake_q.next = NULL;
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
542

ba14a194a   Andy Lutomirski   fork: Add generic...
543
  	account_kernel_stack(tsk, 1);
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
544

5c9a8750a   Dmitry Vyukov   kernel: add kcov ...
545
  	kcov_task_init(tsk);
e41d58185   Dmitry Vyukov   fault-inject: sup...
546
547
548
  #ifdef CONFIG_FAULT_INJECTION
  	tsk->fail_nth = 0;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549
  	return tsk;
61c4628b5   Suresh Siddha   x86, fpu: split F...
550

b235beea9   Linus Torvalds   Clarify naming of...
551
  free_stack:
ba14a194a   Andy Lutomirski   fork: Add generic...
552
  	free_thread_stack(tsk);
f19b9f74b   Akinobu Mita   fork: fix error h...
553
  free_tsk:
61c4628b5   Suresh Siddha   x86, fpu: split F...
554
555
  	free_task_struct(tsk);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
556
557
558
  }
  
  #ifdef CONFIG_MMU
0766f788e   Emese Revfy   latent_entropy: M...
559
560
  static __latent_entropy int dup_mmap(struct mm_struct *mm,
  					struct mm_struct *oldmm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
  {
297c5eee3   Linus Torvalds   mm: make the vma ...
562
  	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563
564
565
  	struct rb_node **rb_link, *rb_parent;
  	int retval;
  	unsigned long charge;
893e26e61   Pavel Emelyanov   userfaultfd: non-...
566
  	LIST_HEAD(uf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567

32cdba1e0   Oleg Nesterov   uprobes: Use perc...
568
  	uprobe_start_dup_mmap();
7c0512679   Michal Hocko   mm, fork: make du...
569
570
571
572
  	if (down_write_killable(&oldmm->mmap_sem)) {
  		retval = -EINTR;
  		goto fail_uprobe_end;
  	}
ec8c0446b   Ralf Baechle   [PATCH] Optimize ...
573
  	flush_cache_dup_mm(oldmm);
f8ac4ec9c   Oleg Nesterov   uprobes: Introduc...
574
  	uprobe_dup_mmap(oldmm, mm);
ad3394517   Ingo Molnar   [PATCH] lockdep: ...
575
576
577
578
  	/*
  	 * Not linked in yet - no deadlock potential:
  	 */
  	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
579

90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
580
581
  	/* No ordering required: file already has been exposed. */
  	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
4f7d46143   Vladimir Davydov   fork: copy mm's v...
582
  	mm->total_vm = oldmm->total_vm;
846383359   Konstantin Khlebnikov   mm: rework virtua...
583
  	mm->data_vm = oldmm->data_vm;
4f7d46143   Vladimir Davydov   fork: copy mm's v...
584
585
  	mm->exec_vm = oldmm->exec_vm;
  	mm->stack_vm = oldmm->stack_vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586
587
588
  	rb_link = &mm->mm_rb.rb_node;
  	rb_parent = NULL;
  	pprev = &mm->mmap;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
589
590
591
  	retval = ksm_fork(mm, oldmm);
  	if (retval)
  		goto out;
ba76149f4   Andrea Arcangeli   thp: khugepaged
592
593
594
  	retval = khugepaged_fork(mm, oldmm);
  	if (retval)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
595

297c5eee3   Linus Torvalds   mm: make the vma ...
596
  	prev = NULL;
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
597
  	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
598
599
600
  		struct file *file;
  
  		if (mpnt->vm_flags & VM_DONTCOPY) {
846383359   Konstantin Khlebnikov   mm: rework virtua...
601
  			vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
602
603
604
605
  			continue;
  		}
  		charge = 0;
  		if (mpnt->vm_flags & VM_ACCOUNT) {
b2412b7fa   Huang Shijie   fork: use vma_pag...
606
  			unsigned long len = vma_pages(mpnt);
191c54244   Al Viro   mm: collapse secu...
607
  			if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
608
609
610
  				goto fail_nomem;
  			charge = len;
  		}
e94b17660   Christoph Lameter   [PATCH] slab: rem...
611
  		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
612
613
614
  		if (!tmp)
  			goto fail_nomem;
  		*tmp = *mpnt;
5beb49305   Rik van Riel   mm: change anon_v...
615
  		INIT_LIST_HEAD(&tmp->anon_vma_chain);
ef0855d33   Oleg Nesterov   mm: mempolicy: tu...
616
617
  		retval = vma_dup_policy(mpnt, tmp);
  		if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618
  			goto fail_nomem_policy;
a247c3a97   Andrea Arcangeli   rmap: fix walk du...
619
  		tmp->vm_mm = mm;
893e26e61   Pavel Emelyanov   userfaultfd: non-...
620
621
622
  		retval = dup_userfaultfd(tmp, &uf);
  		if (retval)
  			goto fail_nomem_anon_vma_fork;
d2cd9ede6   Rik van Riel   mm,fork: introduc...
623
624
625
626
627
628
  		if (tmp->vm_flags & VM_WIPEONFORK) {
  			/* VM_WIPEONFORK gets a clean slate in the child. */
  			tmp->anon_vma = NULL;
  			if (anon_vma_prepare(tmp))
  				goto fail_nomem_anon_vma_fork;
  		} else if (anon_vma_fork(tmp, mpnt))
5beb49305   Rik van Riel   mm: change anon_v...
629
  			goto fail_nomem_anon_vma_fork;
893e26e61   Pavel Emelyanov   userfaultfd: non-...
630
  		tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
297c5eee3   Linus Torvalds   mm: make the vma ...
631
  		tmp->vm_next = tmp->vm_prev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
632
633
  		file = tmp->vm_file;
  		if (file) {
496ad9aa8   Al Viro   new helper: file_...
634
  			struct inode *inode = file_inode(file);
b88ed2059   Hugh Dickins   fix mapping_writa...
635
  			struct address_space *mapping = file->f_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
636
637
638
  			get_file(file);
  			if (tmp->vm_flags & VM_DENYWRITE)
  				atomic_dec(&inode->i_writecount);
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
639
  			i_mmap_lock_write(mapping);
b88ed2059   Hugh Dickins   fix mapping_writa...
640
  			if (tmp->vm_flags & VM_SHARED)
4bb5f5d93   David Herrmann   mm: allow drivers...
641
  				atomic_inc(&mapping->i_mmap_writable);
b88ed2059   Hugh Dickins   fix mapping_writa...
642
643
  			flush_dcache_mmap_lock(mapping);
  			/* insert tmp into the share list, just after mpnt */
27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
644
645
  			vma_interval_tree_insert_after(tmp, mpnt,
  					&mapping->i_mmap);
b88ed2059   Hugh Dickins   fix mapping_writa...
646
  			flush_dcache_mmap_unlock(mapping);
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
647
  			i_mmap_unlock_write(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
648
649
650
  		}
  
  		/*
a1e78772d   Mel Gorman   hugetlb: reserve ...
651
652
653
654
655
656
657
658
  		 * Clear hugetlb-related page reserves for children. This only
  		 * affects MAP_PRIVATE mappings. Faults generated by the child
  		 * are not guaranteed to succeed, even if read-only
  		 */
  		if (is_vm_hugetlb_page(tmp))
  			reset_vma_resv_huge_pages(tmp);
  
  		/*
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
659
  		 * Link in the new vma and copy the page table entries.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
660
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
661
662
  		*pprev = tmp;
  		pprev = &tmp->vm_next;
297c5eee3   Linus Torvalds   mm: make the vma ...
663
664
  		tmp->vm_prev = prev;
  		prev = tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
665
666
667
668
669
670
  
  		__vma_link_rb(mm, tmp, rb_link, rb_parent);
  		rb_link = &tmp->vm_rb.rb_right;
  		rb_parent = &tmp->vm_rb;
  
  		mm->map_count++;
d2cd9ede6   Rik van Riel   mm,fork: introduc...
671
672
  		if (!(tmp->vm_flags & VM_WIPEONFORK))
  			retval = copy_page_range(mm, oldmm, mpnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
673
674
675
676
677
678
679
  
  		if (tmp->vm_ops && tmp->vm_ops->open)
  			tmp->vm_ops->open(tmp);
  
  		if (retval)
  			goto out;
  	}
d6dd61c83   Jeremy Fitzhardinge   [PATCH] x86: PARA...
680
  	/* a new mm has just been created */
ee8e8b2df   Thomas Gleixner   arch, mm: Allow a...
681
  	retval = arch_dup_mmap(oldmm, mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
682
  out:
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
683
  	up_write(&mm->mmap_sem);
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
684
  	flush_tlb_mm(oldmm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
685
  	up_write(&oldmm->mmap_sem);
893e26e61   Pavel Emelyanov   userfaultfd: non-...
686
  	dup_userfaultfd_complete(&uf);
7c0512679   Michal Hocko   mm, fork: make du...
687
  fail_uprobe_end:
32cdba1e0   Oleg Nesterov   uprobes: Use perc...
688
  	uprobe_end_dup_mmap();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
689
  	return retval;
5beb49305   Rik van Riel   mm: change anon_v...
690
  fail_nomem_anon_vma_fork:
ef0855d33   Oleg Nesterov   mm: mempolicy: tu...
691
  	mpol_put(vma_policy(tmp));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
692
693
694
695
696
697
698
  fail_nomem_policy:
  	kmem_cache_free(vm_area_cachep, tmp);
  fail_nomem:
  	retval = -ENOMEM;
  	vm_unacct_memory(charge);
  	goto out;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
699
  static inline int mm_alloc_pgd(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
700
701
702
703
704
705
  {
  	mm->pgd = pgd_alloc(mm);
  	if (unlikely(!mm->pgd))
  		return -ENOMEM;
  	return 0;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
706
  static inline void mm_free_pgd(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
707
  {
5e5419734   Benjamin Herrenschmidt   add mm argument t...
708
  	pgd_free(mm, mm->pgd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
709
710
  }
  #else
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
711
712
713
714
715
716
717
  static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
  {
  	down_write(&oldmm->mmap_sem);
  	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
  	up_write(&oldmm->mmap_sem);
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
719
720
  #define mm_alloc_pgd(mm)	(0)
  #define mm_free_pgd(mm)
  #endif /* CONFIG_MMU */
23ff44402   Daniel Walker   whitespace fixes:...
721
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
722

e94b17660   Christoph Lameter   [PATCH] slab: rem...
723
  #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
  #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
4cb0e11b1   Hidehiro Kawai   coredump_filter: ...
725
726
727
728
729
730
731
732
733
734
735
  static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
  
  static int __init coredump_filter_setup(char *s)
  {
  	default_dump_filter =
  		(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
  		MMF_DUMP_FILTER_MASK;
  	return 1;
  }
  
  __setup("coredump_filter=", coredump_filter_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
736
  #include <linux/init_task.h>
858f09930   Alexey Dobriyan   aio: ifdef fields...
737
738
739
740
  static void mm_init_aio(struct mm_struct *mm)
  {
  #ifdef CONFIG_AIO
  	spin_lock_init(&mm->ioctx_lock);
db446a08c   Benjamin LaHaise   aio: convert the ...
741
  	mm->ioctx_table = NULL;
858f09930   Alexey Dobriyan   aio: ifdef fields...
742
743
  #endif
  }
33144e842   Vladimir Davydov   kernel/fork.c: ma...
744
745
746
747
748
749
  static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
  {
  #ifdef CONFIG_MEMCG
  	mm->owner = p;
  #endif
  }
355627f51   Eric Biggers   mm, uprobes: fix ...
750
751
752
753
754
755
  static void mm_init_uprobes_state(struct mm_struct *mm)
  {
  #ifdef CONFIG_UPROBES
  	mm->uprobes_state.xol_area = NULL;
  #endif
  }
bfedb5892   Eric W. Biederman   mm: Add a user_ns...
756
757
  static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
  	struct user_namespace *user_ns)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
758
  {
41f727fde   Vladimir Davydov   fork/exec: cleanu...
759
760
761
  	mm->mmap = NULL;
  	mm->mm_rb = RB_ROOT;
  	mm->vmacache_seqnum = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
762
763
764
765
  	atomic_set(&mm->mm_users, 1);
  	atomic_set(&mm->mm_count, 1);
  	init_rwsem(&mm->mmap_sem);
  	INIT_LIST_HEAD(&mm->mmlist);
999d9fc16   Oleg Nesterov   coredump: move mm...
766
  	mm->core_state = NULL;
e1f56c89b   Kirill A. Shutemov   mm: convert mm->n...
767
  	atomic_long_set(&mm->nr_ptes, 0);
2d2f5119b   Kirill A. Shutemov   mm: do not use mm...
768
  	mm_nr_pmds_init(mm);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
769
770
  	mm->map_count = 0;
  	mm->locked_vm = 0;
ce65cefa5   Vladimir Davydov   fork: reset mm->p...
771
  	mm->pinned_vm = 0;
d559db086   KAMEZAWA Hiroyuki   mm: clean up mm_c...
772
  	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
773
  	spin_lock_init(&mm->page_table_lock);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
774
  	mm_init_cpumask(mm);
858f09930   Alexey Dobriyan   aio: ifdef fields...
775
  	mm_init_aio(mm);
cf475ad28   Balbir Singh   cgroups: add an o...
776
  	mm_init_owner(mm, p);
2b7e8665b   Eric Biggers   fork: fix incorre...
777
  	RCU_INIT_POINTER(mm->exe_file, NULL);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
778
  	mmu_notifier_mm_init(mm);
133ff0eac   Jérôme Glisse   mm/hmm: heterogen...
779
  	hmm_mm_init(mm);
16af97dc5   Nadav Amit   mm: migrate: prev...
780
  	init_tlb_flush_pending(mm);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
781
782
783
  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
  	mm->pmd_huge_pte = NULL;
  #endif
355627f51   Eric Biggers   mm, uprobes: fix ...
784
  	mm_init_uprobes_state(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
785

a0715cc22   Alex Thorlton   mm, thp: add VM_I...
786
787
788
789
790
  	if (current->mm) {
  		mm->flags = current->mm->flags & MMF_INIT_MASK;
  		mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
  	} else {
  		mm->flags = default_dump_filter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
791
  		mm->def_flags = 0;
a0715cc22   Alex Thorlton   mm, thp: add VM_I...
792
  	}
41f727fde   Vladimir Davydov   fork/exec: cleanu...
793
794
795
796
797
  	if (mm_alloc_pgd(mm))
  		goto fail_nopgd;
  
  	if (init_new_context(p, mm))
  		goto fail_nocontext;
78fb74669   Pavel Emelianov   Memory controller...
798

bfedb5892   Eric W. Biederman   mm: Add a user_ns...
799
  	mm->user_ns = get_user_ns(user_ns);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
800
801
802
803
804
  	return mm;
  
  fail_nocontext:
  	mm_free_pgd(mm);
  fail_nopgd:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
805
806
807
  	free_mm(mm);
  	return NULL;
  }
c3f0327f8   Konstantin Khlebnikov   mm: add rss count...
808
809
810
811
812
813
814
815
816
817
818
819
  static void check_mm(struct mm_struct *mm)
  {
  	int i;
  
  	for (i = 0; i < NR_MM_COUNTERS; i++) {
  		long x = atomic_long_read(&mm->rss_stat.count[i]);
  
  		if (unlikely(x))
  			printk(KERN_ALERT "BUG: Bad rss-counter state "
  					  "mm:%p idx:%d val:%ld
  ", mm, i, x);
  	}
b30fe6c7c   Kirill A. Shutemov   mm: fix false-pos...
820
821
822
823
824
825
826
827
828
  
  	if (atomic_long_read(&mm->nr_ptes))
  		pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld
  ",
  				atomic_long_read(&mm->nr_ptes));
  	if (mm_nr_pmds(mm))
  		pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld
  ",
  				mm_nr_pmds(mm));
e009bb30c   Kirill A. Shutemov   mm: implement spl...
829
  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
96dad67ff   Sasha Levin   mm: use VM_BUG_ON...
830
  	VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
c3f0327f8   Konstantin Khlebnikov   mm: add rss count...
831
832
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833
834
835
  /*
   * Allocate and initialize an mm_struct.
   */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
836
  struct mm_struct *mm_alloc(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
837
  {
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
838
  	struct mm_struct *mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
839
840
  
  	mm = allocate_mm();
de03c72cf   KOSAKI Motohiro   mm: convert mm->c...
841
842
843
844
  	if (!mm)
  		return NULL;
  
  	memset(mm, 0, sizeof(*mm));
bfedb5892   Eric W. Biederman   mm: Add a user_ns...
845
  	return mm_init(mm, current, current_user_ns());
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
846
847
848
849
850
851
852
  }
  
  /*
   * Called when the last reference to the mm
   * is dropped: either by a lazy thread or by
   * mmput. Free the page directory and the mm.
   */
7ad5b3a50   Harvey Harrison   kernel: remove fa...
853
  void __mmdrop(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854
855
856
857
  {
  	BUG_ON(mm == &init_mm);
  	mm_free_pgd(mm);
  	destroy_context(mm);
133ff0eac   Jérôme Glisse   mm/hmm: heterogen...
858
  	hmm_mm_destroy(mm);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
859
  	mmu_notifier_mm_destroy(mm);
c3f0327f8   Konstantin Khlebnikov   mm: add rss count...
860
  	check_mm(mm);
bfedb5892   Eric W. Biederman   mm: Add a user_ns...
861
  	put_user_ns(mm->user_ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
862
863
  	free_mm(mm);
  }
6d4e4c4fc   Avi Kivity   KVM: Disallow for...
864
  EXPORT_SYMBOL_GPL(__mmdrop);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
865

ec8d7c14e   Michal Hocko   mm, oom_reaper: d...
866
867
868
869
870
871
872
873
874
  static inline void __mmput(struct mm_struct *mm)
  {
  	VM_BUG_ON(atomic_read(&mm->mm_users));
  
  	uprobe_clear_state(mm);
  	exit_aio(mm);
  	ksm_exit(mm);
  	khugepaged_exit(mm); /* must run before exit_mmap */
  	exit_mmap(mm);
6fcb52a56   Aaron Lu   thp: reduce usage...
875
  	mm_put_huge_zero_page(mm);
ec8d7c14e   Michal Hocko   mm, oom_reaper: d...
876
877
878
879
880
881
882
883
884
885
  	set_mm_exe_file(mm, NULL);
  	if (!list_empty(&mm->mmlist)) {
  		spin_lock(&mmlist_lock);
  		list_del(&mm->mmlist);
  		spin_unlock(&mmlist_lock);
  	}
  	if (mm->binfmt)
  		module_put(mm->binfmt->module);
  	mmdrop(mm);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
886
887
888
889
890
  /*
   * Decrement the use count and release all resources for an mm.
   */
  void mmput(struct mm_struct *mm)
  {
0ae26f1b3   Andrew Morton   [PATCH] mmput() m...
891
  	might_sleep();
ec8d7c14e   Michal Hocko   mm, oom_reaper: d...
892
893
894
895
  	if (atomic_dec_and_test(&mm->mm_users))
  		__mmput(mm);
  }
  EXPORT_SYMBOL_GPL(mmput);
a1b2289ce   Sherry Yang   android: binder: ...
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
  #ifdef CONFIG_MMU
  static void mmput_async_fn(struct work_struct *work)
  {
  	struct mm_struct *mm = container_of(work, struct mm_struct,
  					    async_put_work);
  
  	__mmput(mm);
  }
  
  void mmput_async(struct mm_struct *mm)
  {
  	if (atomic_dec_and_test(&mm->mm_users)) {
  		INIT_WORK(&mm->async_put_work, mmput_async_fn);
  		schedule_work(&mm->async_put_work);
  	}
  }
  #endif
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
913
914
915
916
917
  /**
   * set_mm_exe_file - change a reference to the mm's executable file
   *
   * This changes mm's executable file (shown as symlink /proc/[pid]/exe).
   *
6e399cd14   Davidlohr Bueso   prctl: avoid usin...
918
919
920
921
922
   * Main users are mmput() and sys_execve(). Callers prevent concurrent
   * invocations: in mmput() nobody alive left, in execve task is single
   * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
   * mm->exe_file, but does so without using set_mm_exe_file() in order
   * to do avoid the need for any locks.
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
923
   */
386460138   Jiri Slaby   mm: extract exe_f...
924
925
  void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
  {
6e399cd14   Davidlohr Bueso   prctl: avoid usin...
926
927
928
929
930
931
932
933
  	struct file *old_exe_file;
  
  	/*
  	 * It is safe to dereference the exe_file without RCU as
  	 * this function is only called if nobody else can access
  	 * this mm -- see comment above for justification.
  	 */
  	old_exe_file = rcu_dereference_raw(mm->exe_file);
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
934

386460138   Jiri Slaby   mm: extract exe_f...
935
936
  	if (new_exe_file)
  		get_file(new_exe_file);
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
937
938
939
  	rcu_assign_pointer(mm->exe_file, new_exe_file);
  	if (old_exe_file)
  		fput(old_exe_file);
386460138   Jiri Slaby   mm: extract exe_f...
940
  }
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
941
942
943
944
945
946
  /**
   * get_mm_exe_file - acquire a reference to the mm's executable file
   *
   * Returns %NULL if mm has no associated executable file.
   * User must release file via fput().
   */
386460138   Jiri Slaby   mm: extract exe_f...
947
948
949
  struct file *get_mm_exe_file(struct mm_struct *mm)
  {
  	struct file *exe_file;
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
950
951
952
953
954
  	rcu_read_lock();
  	exe_file = rcu_dereference(mm->exe_file);
  	if (exe_file && !get_file_rcu(exe_file))
  		exe_file = NULL;
  	rcu_read_unlock();
386460138   Jiri Slaby   mm: extract exe_f...
955
956
  	return exe_file;
  }
11163348a   Davidlohr Bueso   oprofile: reduce ...
957
  EXPORT_SYMBOL(get_mm_exe_file);
386460138   Jiri Slaby   mm: extract exe_f...
958

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
959
  /**
cd81a9170   Mateusz Guzik   mm: introduce get...
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
   * get_task_exe_file - acquire a reference to the task's executable file
   *
   * Returns %NULL if task's mm (if any) has no associated executable file or
   * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
   * User must release file via fput().
   */
  struct file *get_task_exe_file(struct task_struct *task)
  {
  	struct file *exe_file = NULL;
  	struct mm_struct *mm;
  
  	task_lock(task);
  	mm = task->mm;
  	if (mm) {
  		if (!(task->flags & PF_KTHREAD))
  			exe_file = get_mm_exe_file(mm);
  	}
  	task_unlock(task);
  	return exe_file;
  }
  EXPORT_SYMBOL(get_task_exe_file);
386460138   Jiri Slaby   mm: extract exe_f...
981

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
982
983
984
  /**
   * get_task_mm - acquire a reference to the task's mm
   *
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
985
   * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
986
987
988
989
990
991
992
993
994
995
996
997
   * this kernel workthread has transiently adopted a user mm with use_mm,
   * to do its AIO) is not set and if so returns a reference to it, after
   * bumping up the use count.  User must release the mm via mmput()
   * after use.  Typically used by /proc and ptrace.
   */
  struct mm_struct *get_task_mm(struct task_struct *task)
  {
  	struct mm_struct *mm;
  
  	task_lock(task);
  	mm = task->mm;
  	if (mm) {
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
998
  		if (task->flags & PF_KTHREAD)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999
1000
  			mm = NULL;
  		else
3fce371bf   Vegard Nossum   mm: add new mmget...
1001
  			mmget(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002
1003
1004
1005
1006
  	}
  	task_unlock(task);
  	return mm;
  }
  EXPORT_SYMBOL_GPL(get_task_mm);
8cdb878dc   Christopher Yeoh   Fix race in proce...
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
  struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
  {
  	struct mm_struct *mm;
  	int err;
  
  	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
  	if (err)
  		return ERR_PTR(err);
  
  	mm = get_task_mm(task);
  	if (mm && mm != current->mm &&
  			!ptrace_may_access(task, mode)) {
  		mmput(mm);
  		mm = ERR_PTR(-EACCES);
  	}
  	mutex_unlock(&task->signal->cred_guard_mutex);
  
  	return mm;
  }
57b59c4a1   Oleg Nesterov   coredump_wait: do...
1026
  static void complete_vfork_done(struct task_struct *tsk)
c415c3b47   Oleg Nesterov   vfork: introduce ...
1027
  {
d68b46fe1   Oleg Nesterov   vfork: make it ki...
1028
  	struct completion *vfork;
c415c3b47   Oleg Nesterov   vfork: introduce ...
1029

d68b46fe1   Oleg Nesterov   vfork: make it ki...
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
  	task_lock(tsk);
  	vfork = tsk->vfork_done;
  	if (likely(vfork)) {
  		tsk->vfork_done = NULL;
  		complete(vfork);
  	}
  	task_unlock(tsk);
  }
  
  static int wait_for_vfork_done(struct task_struct *child,
  				struct completion *vfork)
  {
  	int killed;
  
  	freezer_do_not_count();
  	killed = wait_for_completion_killable(vfork);
  	freezer_count();
  
  	if (killed) {
  		task_lock(child);
  		child->vfork_done = NULL;
  		task_unlock(child);
  	}
  
  	put_task_struct(child);
  	return killed;
c415c3b47   Oleg Nesterov   vfork: introduce ...
1056
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
  /* Please note the differences between mmput and mm_release.
   * mmput is called whenever we stop holding onto a mm_struct,
   * error success whatever.
   *
   * mm_release is called after a mm_struct has been removed
   * from the current process.
   *
   * This difference is important for error handling, when we
   * only half set up a mm_struct for a new process and need to restore
   * the old one.  Because we mmput the new mm_struct before
   * restoring the old one. . .
   * Eric Biederman 10 January 1998
   */
  void mm_release(struct task_struct *tsk, struct mm_struct *mm)
  {
8141c7f3e   Linus Torvalds   Move "exit_robust...
1072
1073
  	/* Get rid of any futexes when releasing the mm */
  #ifdef CONFIG_FUTEX
fc6b177de   Peter Zijlstra   futex: Nullify ro...
1074
  	if (unlikely(tsk->robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
1075
  		exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
1076
1077
  		tsk->robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
1078
  #ifdef CONFIG_COMPAT
fc6b177de   Peter Zijlstra   futex: Nullify ro...
1079
  	if (unlikely(tsk->compat_robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
1080
  		compat_exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
1081
1082
  		tsk->compat_robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
1083
  #endif
322a2c100   Thomas Gleixner   futex: Move exit_...
1084
1085
  	if (unlikely(!list_empty(&tsk->pi_state_list)))
  		exit_pi_state_list(tsk);
8141c7f3e   Linus Torvalds   Move "exit_robust...
1086
  #endif
0326f5a94   Srikar Dronamraju   uprobes/core: Han...
1087
  	uprobe_free_utask(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1088
1089
  	/* Get rid of any cached register state */
  	deactivate_mm(tsk, mm);
fec1d0115   Roland McGrath   [PATCH] Disable C...
1090
  	/*
735f2770a   Michal Hocko   kernel/fork: fix ...
1091
1092
1093
  	 * Signal userspace if we're not exiting with a core dump
  	 * because we want to leave the value intact for debugging
  	 * purposes.
fec1d0115   Roland McGrath   [PATCH] Disable C...
1094
  	 */
9c8a8228d   Eric Dumazet   execve: must clea...
1095
  	if (tsk->clear_child_tid) {
735f2770a   Michal Hocko   kernel/fork: fix ...
1096
  		if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
9c8a8228d   Eric Dumazet   execve: must clea...
1097
1098
1099
1100
1101
1102
1103
1104
1105
  		    atomic_read(&mm->mm_users) > 1) {
  			/*
  			 * We don't check the error code - if userspace has
  			 * not set up a proper pointer then tough luck.
  			 */
  			put_user(0, tsk->clear_child_tid);
  			sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
  					1, NULL, NULL, 0);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1106
  		tsk->clear_child_tid = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1107
  	}
f7505d64f   Konstantin Khlebnikov   fork: call comple...
1108
1109
1110
1111
1112
1113
1114
  
  	/*
  	 * All done, finally we can wake up parent and return this mm to him.
  	 * Also kthread_stop() uses this completion for synchronization.
  	 */
  	if (tsk->vfork_done)
  		complete_vfork_done(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1115
  }
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1116
1117
1118
1119
  /*
   * Allocate a new mm structure and copy contents from the
   * mm structure of the passed in task structure.
   */
ff252c1fc   DaeSeok Youn   kernel/fork.c: ma...
1120
  static struct mm_struct *dup_mm(struct task_struct *tsk)
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1121
1122
1123
  {
  	struct mm_struct *mm, *oldmm = current->mm;
  	int err;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1124
1125
1126
1127
1128
  	mm = allocate_mm();
  	if (!mm)
  		goto fail_nomem;
  
  	memcpy(mm, oldmm, sizeof(*mm));
bfedb5892   Eric W. Biederman   mm: Add a user_ns...
1129
  	if (!mm_init(mm, tsk, mm->user_ns))
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1130
  		goto fail_nomem;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1131
1132
1133
1134
1135
1136
  	err = dup_mmap(mm, oldmm);
  	if (err)
  		goto free_pt;
  
  	mm->hiwater_rss = get_mm_rss(mm);
  	mm->hiwater_vm = mm->total_vm;
801460d0c   Hiroshi Shimamoto   task_struct clean...
1137
1138
  	if (mm->binfmt && !try_module_get(mm->binfmt->module))
  		goto free_pt;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1139
1140
1141
  	return mm;
  
  free_pt:
801460d0c   Hiroshi Shimamoto   task_struct clean...
1142
1143
  	/* don't put binfmt in mmput, we haven't got module yet */
  	mm->binfmt = NULL;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1144
1145
1146
1147
  	mmput(mm);
  
  fail_nomem:
  	return NULL;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1148
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1149
  static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1150
  {
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1151
  	struct mm_struct *mm, *oldmm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1152
1153
1154
1155
  	int retval;
  
  	tsk->min_flt = tsk->maj_flt = 0;
  	tsk->nvcsw = tsk->nivcsw = 0;
17406b82d   Mandeep Singh Baines   softlockup: remov...
1156
1157
1158
  #ifdef CONFIG_DETECT_HUNG_TASK
  	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
  
  	tsk->mm = NULL;
  	tsk->active_mm = NULL;
  
  	/*
  	 * Are we cloning a kernel thread?
  	 *
  	 * We need to steal a active VM for that..
  	 */
  	oldmm = current->mm;
  	if (!oldmm)
  		return 0;
615d6e875   Davidlohr Bueso   mm: per-thread vm...
1171
1172
  	/* initialize the new vmacache entries */
  	vmacache_flush(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1173
  	if (clone_flags & CLONE_VM) {
3fce371bf   Vegard Nossum   mm: add new mmget...
1174
  		mmget(oldmm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175
  		mm = oldmm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1176
1177
1178
1179
  		goto good_mm;
  	}
  
  	retval = -ENOMEM;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1180
  	mm = dup_mm(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1181
1182
  	if (!mm)
  		goto fail_nomem;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1183
1184
1185
1186
  good_mm:
  	tsk->mm = mm;
  	tsk->active_mm = mm;
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1187
1188
  fail_nomem:
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1189
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1190
  static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1191
  {
498052bba   Al Viro   New locking/refco...
1192
  	struct fs_struct *fs = current->fs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1193
  	if (clone_flags & CLONE_FS) {
498052bba   Al Viro   New locking/refco...
1194
  		/* tsk->fs is already what we want */
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1195
  		spin_lock(&fs->lock);
498052bba   Al Viro   New locking/refco...
1196
  		if (fs->in_exec) {
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1197
  			spin_unlock(&fs->lock);
498052bba   Al Viro   New locking/refco...
1198
1199
1200
  			return -EAGAIN;
  		}
  		fs->users++;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1201
  		spin_unlock(&fs->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1202
1203
  		return 0;
  	}
498052bba   Al Viro   New locking/refco...
1204
  	tsk->fs = copy_fs_struct(fs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1205
1206
1207
1208
  	if (!tsk->fs)
  		return -ENOMEM;
  	return 0;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1209
  static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
a016f3389   JANAK DESAI   [PATCH] unshare s...
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
  {
  	struct files_struct *oldf, *newf;
  	int error = 0;
  
  	/*
  	 * A background process may not have any files ...
  	 */
  	oldf = current->files;
  	if (!oldf)
  		goto out;
  
  	if (clone_flags & CLONE_FILES) {
  		atomic_inc(&oldf->count);
  		goto out;
  	}
a016f3389   JANAK DESAI   [PATCH] unshare s...
1225
1226
1227
1228
1229
1230
1231
1232
1233
  	newf = dup_fd(oldf, &error);
  	if (!newf)
  		goto out;
  
  	tsk->files = newf;
  	error = 0;
  out:
  	return error;
  }
fadad878c   Jens Axboe   kernel: add CLONE...
1234
  static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
fd0928df9   Jens Axboe   ioprio: move io p...
1235
1236
1237
  {
  #ifdef CONFIG_BLOCK
  	struct io_context *ioc = current->io_context;
6e736be7f   Tejun Heo   block: make ioc g...
1238
  	struct io_context *new_ioc;
fd0928df9   Jens Axboe   ioprio: move io p...
1239
1240
1241
  
  	if (!ioc)
  		return 0;
fadad878c   Jens Axboe   kernel: add CLONE...
1242
1243
1244
1245
  	/*
  	 * Share io context with parent, if CLONE_IO is set
  	 */
  	if (clone_flags & CLONE_IO) {
3d48749d9   Tejun Heo   block: ioc_task_l...
1246
1247
  		ioc_task_link(ioc);
  		tsk->io_context = ioc;
fadad878c   Jens Axboe   kernel: add CLONE...
1248
  	} else if (ioprio_valid(ioc->ioprio)) {
6e736be7f   Tejun Heo   block: make ioc g...
1249
1250
  		new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
  		if (unlikely(!new_ioc))
fd0928df9   Jens Axboe   ioprio: move io p...
1251
  			return -ENOMEM;
6e736be7f   Tejun Heo   block: make ioc g...
1252
  		new_ioc->ioprio = ioc->ioprio;
11a3122f6   Tejun Heo   block: strip out ...
1253
  		put_io_context(new_ioc);
fd0928df9   Jens Axboe   ioprio: move io p...
1254
1255
1256
1257
  	}
  #endif
  	return 0;
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1258
  static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1259
1260
  {
  	struct sighand_struct *sig;
60348802e   Zhaolei   fork.c: cleanup f...
1261
  	if (clone_flags & CLONE_SIGHAND) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1262
1263
1264
1265
  		atomic_inc(&current->sighand->count);
  		return 0;
  	}
  	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
e56d09031   Ingo Molnar   [PATCH] RCU signa...
1266
  	rcu_assign_pointer(tsk->sighand, sig);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1267
1268
  	if (!sig)
  		return -ENOMEM;
9d7fb0427   Peter Zijlstra   sched/cputime: Gu...
1269

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1270
  	atomic_set(&sig->count, 1);
f552f8c28   Jann Horn   fork: don't copy ...
1271
  	spin_lock_irq(&current->sighand->siglock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1272
  	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
f552f8c28   Jann Horn   fork: don't copy ...
1273
  	spin_unlock_irq(&current->sighand->siglock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1274
1275
  	return 0;
  }
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
1276
  void __cleanup_sighand(struct sighand_struct *sighand)
c81addc9d   Oleg Nesterov   [PATCH] rename __...
1277
  {
d80e731ec   Oleg Nesterov   epoll: introduce ...
1278
1279
  	if (atomic_dec_and_test(&sighand->count)) {
  		signalfd_cleanup(sighand);
392809b25   Oleg Nesterov   signal: Document ...
1280
  		/*
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
1281
  		 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
392809b25   Oleg Nesterov   signal: Document ...
1282
1283
  		 * without an RCU grace period, see __lock_task_sighand().
  		 */
c81addc9d   Oleg Nesterov   [PATCH] rename __...
1284
  		kmem_cache_free(sighand_cachep, sighand);
d80e731ec   Oleg Nesterov   epoll: introduce ...
1285
  	}
c81addc9d   Oleg Nesterov   [PATCH] rename __...
1286
  }
b18b6a9ce   Nicolas Pitre   timers: Omit POSI...
1287
  #ifdef CONFIG_POSIX_TIMERS
f06febc96   Frank Mayhar   timers: fix itime...
1288
1289
1290
1291
1292
  /*
   * Initialize POSIX timer handling for a thread group.
   */
  static void posix_cpu_timers_init_group(struct signal_struct *sig)
  {
78d7d407b   Jiri Slaby   kernel core: use ...
1293
  	unsigned long cpu_limit;
316c1608d   Jason Low   sched, timer: Con...
1294
  	cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
78d7d407b   Jiri Slaby   kernel core: use ...
1295
  	if (cpu_limit != RLIM_INFINITY) {
ebd7e7fc4   Frederic Weisbecker   timers/posix-time...
1296
  		sig->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC;
d5c373eb5   Jason Low   posix_cpu_timer: ...
1297
  		sig->cputimer.running = true;
6279a751f   Oleg Nesterov   posix-timers: fix...
1298
  	}
f06febc96   Frank Mayhar   timers: fix itime...
1299
1300
1301
1302
1303
  	/* The timer lists. */
  	INIT_LIST_HEAD(&sig->cpu_timers[0]);
  	INIT_LIST_HEAD(&sig->cpu_timers[1]);
  	INIT_LIST_HEAD(&sig->cpu_timers[2]);
  }
b18b6a9ce   Nicolas Pitre   timers: Omit POSI...
1304
1305
1306
  #else
  static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { }
  #endif
f06febc96   Frank Mayhar   timers: fix itime...
1307

a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1308
  static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1309
1310
  {
  	struct signal_struct *sig;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311

4ab6c0833   Oleg Nesterov   clone(): fix race...
1312
  	if (clone_flags & CLONE_THREAD)
490dea45d   Peter Zijlstra   itimers: remove t...
1313
  		return 0;
490dea45d   Peter Zijlstra   itimers: remove t...
1314

a56704ef6   Veaceslav Falico   copy_signal() cle...
1315
  	sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1316
1317
1318
  	tsk->signal = sig;
  	if (!sig)
  		return -ENOMEM;
b3ac022cb   Oleg Nesterov   proc: turn signal...
1319
  	sig->nr_threads = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1320
  	atomic_set(&sig->live, 1);
b3ac022cb   Oleg Nesterov   proc: turn signal...
1321
  	atomic_set(&sig->sigcnt, 1);
0c740d0af   Oleg Nesterov   introduce for_eac...
1322
1323
1324
1325
  
  	/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
  	sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
  	tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1326
  	init_waitqueue_head(&sig->wait_chldexit);
db51aeccd   Oleg Nesterov   signals: microopt...
1327
  	sig->curr_target = tsk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1328
  	init_sigpending(&sig->shared_pending);
e78c34967   Rik van Riel   time, signal: Pro...
1329
  	seqlock_init(&sig->stats_lock);
9d7fb0427   Peter Zijlstra   sched/cputime: Gu...
1330
  	prev_cputime_init(&sig->prev_cputime);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1331

baa73d9e4   Nicolas Pitre   posix-timers: Mak...
1332
  #ifdef CONFIG_POSIX_TIMERS
b18b6a9ce   Nicolas Pitre   timers: Omit POSI...
1333
  	INIT_LIST_HEAD(&sig->posix_timers);
c9cb2e3d7   Thomas Gleixner   [PATCH] hrtimers:...
1334
  	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1335
  	sig->real_timer.function = it_real_fn;
baa73d9e4   Nicolas Pitre   posix-timers: Mak...
1336
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1337

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1338
1339
1340
  	task_lock(current->group_leader);
  	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
  	task_unlock(current->group_leader);
6279a751f   Oleg Nesterov   posix-timers: fix...
1341
  	posix_cpu_timers_init_group(sig);
522ed7767   Miloslav Trmac   Audit: add TTY in...
1342
  	tty_audit_fork(sig);
5091faa44   Mike Galbraith   sched: Add 'autog...
1343
  	sched_autogroup_fork(sig);
522ed7767   Miloslav Trmac   Audit: add TTY in...
1344

a63d83f42   David Rientjes   oom: badness heur...
1345
  	sig->oom_score_adj = current->signal->oom_score_adj;
dabb16f63   Mandeep Singh Baines   oom: allow a non-...
1346
  	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
28b83c519   KOSAKI Motohiro   oom: move oom_adj...
1347

9b1bf12d5   KOSAKI Motohiro   signals: move cre...
1348
  	mutex_init(&sig->cred_guard_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1349
1350
  	return 0;
  }
dbd952127   Kees Cook   seccomp: introduc...
1351
1352
1353
1354
1355
1356
1357
1358
1359
  static void copy_seccomp(struct task_struct *p)
  {
  #ifdef CONFIG_SECCOMP
  	/*
  	 * Must be called with sighand->lock held, which is common to
  	 * all threads in the group. Holding cred_guard_mutex is not
  	 * needed because this new task is not yet running and cannot
  	 * be racing exec.
  	 */
69f6a34bd   Guenter Roeck   seccomp: Replace ...
1360
  	assert_spin_locked(&current->sighand->siglock);
dbd952127   Kees Cook   seccomp: introduc...
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
  
  	/* Ref-count the new filter user, and assign it. */
  	get_seccomp_filter(current);
  	p->seccomp = current->seccomp;
  
  	/*
  	 * Explicitly enable no_new_privs here in case it got set
  	 * between the task_struct being duplicated and holding the
  	 * sighand lock. The seccomp state and nnp must be in sync.
  	 */
  	if (task_no_new_privs(current))
  		task_set_no_new_privs(p);
  
  	/*
  	 * If the parent gained a seccomp mode after copying thread
  	 * flags and between before we held the sighand lock, we have
  	 * to manually enable the seccomp thread flag here.
  	 */
  	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
  		set_tsk_thread_flag(p, TIF_SECCOMP);
  #endif
  }
17da2bd90   Heiko Carstens   [CVE-2009-0029] S...
1383
  SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1384
1385
  {
  	current->clear_child_tid = tidptr;
b488893a3   Pavel Emelyanov   pid namespaces: c...
1386
  	return task_pid_vnr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1387
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1388
  static void rt_mutex_init_task(struct task_struct *p)
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
1389
  {
1d6154825   Thomas Gleixner   sched: Convert pi...
1390
  	raw_spin_lock_init(&p->pi_lock);
e29e175b0   Zilvinas Valinskas   [PATCH] initialis...
1391
  #ifdef CONFIG_RT_MUTEXES
a23ba907d   Davidlohr Bueso   locking/rtmutex: ...
1392
  	p->pi_waiters = RB_ROOT_CACHED;
e96a7705e   Xunlei Pang   sched/rtmutex/dea...
1393
  	p->pi_top_task = NULL;
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
1394
  	p->pi_blocked_on = NULL;
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
1395
1396
  #endif
  }
b18b6a9ce   Nicolas Pitre   timers: Omit POSI...
1397
  #ifdef CONFIG_POSIX_TIMERS
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1398
  /*
f06febc96   Frank Mayhar   timers: fix itime...
1399
1400
1401
1402
   * Initialize POSIX timer handling for a single task.
   */
  static void posix_cpu_timers_init(struct task_struct *tsk)
  {
648616343   Martin Schwidefsky   [S390] cputime: a...
1403
1404
  	tsk->cputime_expires.prof_exp = 0;
  	tsk->cputime_expires.virt_exp = 0;
f06febc96   Frank Mayhar   timers: fix itime...
1405
1406
1407
1408
1409
  	tsk->cputime_expires.sched_exp = 0;
  	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
  }
b18b6a9ce   Nicolas Pitre   timers: Omit POSI...
1410
1411
1412
  #else
  static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
  #endif
f06febc96   Frank Mayhar   timers: fix itime...
1413

819077398   Oleg Nesterov   kernel/fork.c:cop...
1414
1415
1416
1417
1418
  static inline void
  init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
  {
  	 task->pids[type].pid = pid;
  }
6bfbaa51e   Ingo Molnar   sched/headers, RC...
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
  static inline void rcu_copy_process(struct task_struct *p)
  {
  #ifdef CONFIG_PREEMPT_RCU
  	p->rcu_read_lock_nesting = 0;
  	p->rcu_read_unlock_special.s = 0;
  	p->rcu_blocked_node = NULL;
  	INIT_LIST_HEAD(&p->rcu_node_entry);
  #endif /* #ifdef CONFIG_PREEMPT_RCU */
  #ifdef CONFIG_TASKS_RCU
  	p->rcu_tasks_holdout = false;
  	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
  	p->rcu_tasks_idle_cpu = -1;
  #endif /* #ifdef CONFIG_TASKS_RCU */
  }
f06febc96   Frank Mayhar   timers: fix itime...
1433
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1434
1435
1436
1437
1438
1439
1440
   * This creates a new process as a copy of the old one,
   * but does not actually start it yet.
   *
   * It copies the registers, and all the appropriate
   * parts of the process environment (as per the clone
   * flags). The actual kick-off is left to the caller.
   */
0766f788e   Emese Revfy   latent_entropy: M...
1441
1442
  static __latent_entropy struct task_struct *copy_process(
  					unsigned long clone_flags,
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1443
  					unsigned long stack_start,
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1444
  					unsigned long stack_size,
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1445
  					int __user *child_tidptr,
09a05394f   Roland McGrath   tracehook: clone
1446
  					struct pid *pid,
3033f14ab   Josh Triplett   clone: support pa...
1447
  					int trace,
725fc629f   Andi Kleen   kernek/fork.c: al...
1448
1449
  					unsigned long tls,
  					int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1450
1451
  {
  	int retval;
a24efe62d   Mariusz Kozlowski   kernel/fork.c: re...
1452
  	struct task_struct *p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1453
1454
1455
  
  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
  		return ERR_PTR(-EINVAL);
e66eded83   Eric W. Biederman   userns: Don't all...
1456
1457
  	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
  		return ERR_PTR(-EINVAL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
  	/*
  	 * Thread groups must share signals as well, and detached threads
  	 * can only be started up within the thread group.
  	 */
  	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
  		return ERR_PTR(-EINVAL);
  
  	/*
  	 * Shared signal handlers imply shared VM. By way of the above,
  	 * thread groups also imply shared VM. Blocking this case allows
  	 * for various simplifications in other code.
  	 */
  	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
  		return ERR_PTR(-EINVAL);
123be07b0   Sukadev Bhattiprolu   fork(): disable C...
1472
1473
1474
1475
1476
1477
1478
1479
1480
  	/*
  	 * Siblings of global init remain as zombies on exit since they are
  	 * not reaped by their parent (swapper). To solve this and to avoid
  	 * multi-rooted process trees, prevent global and container-inits
  	 * from creating siblings.
  	 */
  	if ((clone_flags & CLONE_PARENT) &&
  				current->signal->flags & SIGNAL_UNKILLABLE)
  		return ERR_PTR(-EINVAL);
8382fcac1   Eric W. Biederman   pidns: Outlaw thr...
1481
  	/*
40a0d32d1   Oleg Nesterov   fork: unify and t...
1482
  	 * If the new process will be in a different pid or user namespace
faf00da54   Eric W. Biederman   userns,pidns: For...
1483
  	 * do not allow it to share a thread group with the forking task.
8382fcac1   Eric W. Biederman   pidns: Outlaw thr...
1484
  	 */
faf00da54   Eric W. Biederman   userns,pidns: For...
1485
  	if (clone_flags & CLONE_THREAD) {
40a0d32d1   Oleg Nesterov   fork: unify and t...
1486
1487
1488
1489
1490
  		if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
  		    (task_active_pid_ns(current) !=
  				current->nsproxy->pid_ns_for_children))
  			return ERR_PTR(-EINVAL);
  	}
8382fcac1   Eric W. Biederman   pidns: Outlaw thr...
1491

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1492
  	retval = -ENOMEM;
725fc629f   Andi Kleen   kernek/fork.c: al...
1493
  	p = dup_task_struct(current, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1494
1495
  	if (!p)
  		goto fork_out;
4d6501dce   Vegard Nossum   kthread: Fix use-...
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
  	/*
  	 * This _must_ happen before we call free_task(), i.e. before we jump
  	 * to any of the bad_fork_* labels. This is to avoid freeing
  	 * p->set_child_tid which is (ab)used as a kthread's data pointer for
  	 * kernel threads (PF_KTHREAD).
  	 */
  	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
  	/*
  	 * Clear TID on mm_release()?
  	 */
  	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
f7e8b616e   Steven Rostedt   function-graph: m...
1507
  	ftrace_graph_init_task(p);
bea493a03   Peter Zijlstra   [PATCH] rt-mutex:...
1508
  	rt_mutex_init_task(p);
d12c1a379   Ingo Molnar   lockdep: fix kern...
1509
  #ifdef CONFIG_PROVE_LOCKING
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1510
1511
1512
  	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
  	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1513
  	retval = -EAGAIN;
3b11a1dec   David Howells   CRED: Differentia...
1514
  	if (atomic_read(&p->real_cred->user->processes) >=
78d7d407b   Jiri Slaby   kernel core: use ...
1515
  			task_rlimit(p, RLIMIT_NPROC)) {
b57922b6c   Eric Paris   fork: reorder per...
1516
1517
  		if (p->real_cred->user != INIT_USER &&
  		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1518
1519
  			goto bad_fork_free;
  	}
72fa59970   Vasiliy Kulikov   move RLIMIT_NPROC...
1520
  	current->flags &= ~PF_NPROC_EXCEEDED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1521

f1752eec6   David Howells   CRED: Detach the ...
1522
1523
1524
  	retval = copy_creds(p, clone_flags);
  	if (retval < 0)
  		goto bad_fork_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1525
1526
1527
1528
1529
1530
  
  	/*
  	 * If multiple threads are within copy_process(), then this check
  	 * triggers too late. This doesn't hurt, the check is only there
  	 * to stop root fork bombs.
  	 */
04ec93fe9   Li Zefan   fork.c: fix NULL ...
1531
  	retval = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1532
1533
  	if (nr_threads >= max_threads)
  		goto bad_fork_cleanup_count;
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
1534
  	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
c1de45ca8   Peter Zijlstra   sched/idle: Add s...
1535
  	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);
514ddb446   David Rientjes   fork: collapse co...
1536
  	p->flags |= PF_FORKNOEXEC;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1537
1538
  	INIT_LIST_HEAD(&p->children);
  	INIT_LIST_HEAD(&p->sibling);
f41d911f8   Paul E. McKenney   rcu: Merge preemp...
1539
  	rcu_copy_process(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1540
1541
  	p->vfork_done = NULL;
  	spin_lock_init(&p->alloc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1542

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1543
  	init_sigpending(&p->pending);
648616343   Martin Schwidefsky   [S390] cputime: a...
1544
  	p->utime = p->stime = p->gtime = 0;
40565b5ae   Stanislaw Gruszka   sched/cputime, po...
1545
  #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
648616343   Martin Schwidefsky   [S390] cputime: a...
1546
  	p->utimescaled = p->stimescaled = 0;
40565b5ae   Stanislaw Gruszka   sched/cputime, po...
1547
  #endif
9d7fb0427   Peter Zijlstra   sched/cputime: Gu...
1548
  	prev_cputime_init(&p->prev_cputime);
6a61671bb   Frederic Weisbecker   cputime: Safely r...
1549
  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
bac5b6b6b   Frederic Weisbecker   sched/cputime: Mo...
1550
1551
1552
  	seqcount_init(&p->vtime.seqcount);
  	p->vtime.starttime = 0;
  	p->vtime.state = VTIME_INACTIVE;
6a61671bb   Frederic Weisbecker   cputime: Safely r...
1553
  #endif
a3a2e76c7   KAMEZAWA Hiroyuki   mm: avoid null-po...
1554
1555
1556
  #if defined(SPLIT_RSS_COUNTING)
  	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
  #endif
172ba844a   Balbir Singh   sched: update del...
1557

6976675d9   Arjan van de Ven   hrtimer: create a...
1558
  	p->default_timer_slack_ns = current->timer_slack_ns;
5995477ab   Andrea Righi   task IO accountin...
1559
  	task_io_accounting_init(&p->ioac);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1560
  	acct_clear_integrals(p);
f06febc96   Frank Mayhar   timers: fix itime...
1561
  	posix_cpu_timers_init(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1562

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1563
  	p->io_context = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1564
  	p->audit_context = NULL;
b4f48b636   Paul Menage   Task Control Grou...
1565
  	cgroup_fork(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1566
  #ifdef CONFIG_NUMA
846a16bf0   Lee Schermerhorn   mempolicy: rename...
1567
  	p->mempolicy = mpol_dup(p->mempolicy);
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1568
1569
1570
  	if (IS_ERR(p->mempolicy)) {
  		retval = PTR_ERR(p->mempolicy);
  		p->mempolicy = NULL;
e8604cb43   Li Zefan   cgroup: fix spuri...
1571
  		goto bad_fork_cleanup_threadgroup_lock;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1572
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1573
  #endif
778d3b0ff   Michal Hocko   cpusets: randomiz...
1574
1575
1576
  #ifdef CONFIG_CPUSETS
  	p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
  	p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1577
  	seqcount_init(&p->mems_allowed_seq);
778d3b0ff   Michal Hocko   cpusets: randomiz...
1578
  #endif
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
  #ifdef CONFIG_TRACE_IRQFLAGS
  	p->irq_events = 0;
  	p->hardirqs_enabled = 0;
  	p->hardirq_enable_ip = 0;
  	p->hardirq_enable_event = 0;
  	p->hardirq_disable_ip = _THIS_IP_;
  	p->hardirq_disable_event = 0;
  	p->softirqs_enabled = 1;
  	p->softirq_enable_ip = _THIS_IP_;
  	p->softirq_enable_event = 0;
  	p->softirq_disable_ip = 0;
  	p->softirq_disable_event = 0;
  	p->hardirq_context = 0;
  	p->softirq_context = 0;
  #endif
8bcbde548   David Hildenbrand   sched/preempt, mm...
1594
1595
  
  	p->pagefault_disabled = 0;
fbb9ce953   Ingo Molnar   [PATCH] lockdep: ...
1596
1597
1598
1599
  #ifdef CONFIG_LOCKDEP
  	p->lockdep_depth = 0; /* no locks held yet */
  	p->curr_chain_key = 0;
  	p->lockdep_recursion = 0;
b09be676e   Byungchul Park   locking/lockdep: ...
1600
  	lockdep_init_task(p);
fbb9ce953   Ingo Molnar   [PATCH] lockdep: ...
1601
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1602

408894ee4   Ingo Molnar   [PATCH] mutex sub...
1603
1604
1605
  #ifdef CONFIG_DEBUG_MUTEXES
  	p->blocked_on = NULL; /* not blocked yet */
  #endif
cafe56359   Kent Overstreet   bcache: A block l...
1606
1607
1608
1609
  #ifdef CONFIG_BCACHE
  	p->sequential_io	= 0;
  	p->sequential_io_avg	= 0;
  #endif
0f4814065   Markus Metzger   x86, ptrace: add ...
1610

3c90e6e99   Srivatsa Vaddagiri   sched: fix copy_n...
1611
  	/* Perform scheduler related setup. Assign this task to a CPU. */
aab03e05e   Dario Faggioli   sched/deadline: A...
1612
1613
1614
  	retval = sched_fork(clone_flags, p);
  	if (retval)
  		goto bad_fork_cleanup_policy;
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1615

cdd6c482c   Ingo Molnar   perf: Do the big ...
1616
  	retval = perf_event_init_task(p);
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1617
1618
  	if (retval)
  		goto bad_fork_cleanup_policy;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1619
1620
  	retval = audit_alloc(p);
  	if (retval)
6c72e3501   Peter Zijlstra   perf: fix perf bu...
1621
  		goto bad_fork_cleanup_perf;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1622
  	/* copy all the process information */
ab602f799   Jack Miller   shm: make exit_sh...
1623
  	shm_init_task(p);
e4e55b47e   Tetsuo Handa   LSM: Revive secur...
1624
  	retval = security_task_alloc(p, clone_flags);
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1625
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1626
  		goto bad_fork_cleanup_audit;
e4e55b47e   Tetsuo Handa   LSM: Revive secur...
1627
1628
1629
  	retval = copy_semundo(clone_flags, p);
  	if (retval)
  		goto bad_fork_cleanup_security;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1630
1631
  	retval = copy_files(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1632
  		goto bad_fork_cleanup_semundo;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1633
1634
  	retval = copy_fs(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1635
  		goto bad_fork_cleanup_files;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1636
1637
  	retval = copy_sighand(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1638
  		goto bad_fork_cleanup_fs;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1639
1640
  	retval = copy_signal(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1641
  		goto bad_fork_cleanup_sighand;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1642
1643
  	retval = copy_mm(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1644
  		goto bad_fork_cleanup_signal;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1645
1646
  	retval = copy_namespaces(clone_flags, p);
  	if (retval)
d84f4f992   David Howells   CRED: Inaugurate ...
1647
  		goto bad_fork_cleanup_mm;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1648
1649
  	retval = copy_io(clone_flags, p);
  	if (retval)
fd0928df9   Jens Axboe   ioprio: move io p...
1650
  		goto bad_fork_cleanup_namespaces;
3033f14ab   Josh Triplett   clone: support pa...
1651
  	retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1652
  	if (retval)
fd0928df9   Jens Axboe   ioprio: move io p...
1653
  		goto bad_fork_cleanup_io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1654

425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1655
  	if (pid != &init_struct_pid) {
c2b1df2eb   Andy Lutomirski   Rename nsproxy.pi...
1656
  		pid = alloc_pid(p->nsproxy->pid_ns_for_children);
35f71bc0a   Michal Hocko   fork: report pid ...
1657
1658
  		if (IS_ERR(pid)) {
  			retval = PTR_ERR(pid);
0740aa5f6   Jiri Slaby   fork: free thread...
1659
  			goto bad_fork_cleanup_thread;
35f71bc0a   Michal Hocko   fork: report pid ...
1660
  		}
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1661
  	}
73c101011   Jens Axboe   block: initial pa...
1662
1663
1664
  #ifdef CONFIG_BLOCK
  	p->plug = NULL;
  #endif
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1665
  #ifdef CONFIG_FUTEX
8f17d3a50   Ingo Molnar   [PATCH] lightweig...
1666
1667
1668
1669
  	p->robust_list = NULL;
  #ifdef CONFIG_COMPAT
  	p->compat_robust_list = NULL;
  #endif
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1670
1671
  	INIT_LIST_HEAD(&p->pi_state_list);
  	p->pi_state_cache = NULL;
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1672
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1673
  	/*
f9a3879ab   GOTO Masanori   [PATCH] Fix sigal...
1674
1675
1676
  	 * sigaltstack should be cleared when sharing the same VM
  	 */
  	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
2a7421383   Stas Sergeev   signals/sigaltsta...
1677
  		sas_ss_reset(p);
f9a3879ab   GOTO Masanori   [PATCH] Fix sigal...
1678
1679
  
  	/*
6580807da   Oleg Nesterov   ptrace: copy_proc...
1680
1681
  	 * Syscall tracing and stepping should be turned off in the
  	 * child regardless of CLONE_PTRACE.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1682
  	 */
6580807da   Oleg Nesterov   ptrace: copy_proc...
1683
  	user_disable_single_step(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1684
  	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
ed75e8d58   Laurent Vivier   [PATCH] UML Suppo...
1685
1686
1687
  #ifdef TIF_SYSCALL_EMU
  	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
  #endif
9745512ce   Arjan van de Ven   sched: latencytop...
1688
  	clear_all_latency_tracing(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1689

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1690
  	/* ok, now we should be set up.. */
18c830df7   Oleg Nesterov   kernel/fork.c:cop...
1691
1692
  	p->pid = pid_nr(pid);
  	if (clone_flags & CLONE_THREAD) {
5f8aadd8b   Oleg Nesterov   CLONE_PARENT shou...
1693
  		p->exit_signal = -1;
18c830df7   Oleg Nesterov   kernel/fork.c:cop...
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
  		p->group_leader = current->group_leader;
  		p->tgid = current->tgid;
  	} else {
  		if (clone_flags & CLONE_PARENT)
  			p->exit_signal = current->group_leader->exit_signal;
  		else
  			p->exit_signal = (clone_flags & CSIGNAL);
  		p->group_leader = p;
  		p->tgid = p->pid;
  	}
5f8aadd8b   Oleg Nesterov   CLONE_PARENT shou...
1704

9d823e8f6   Wu Fengguang   writeback: per ta...
1705
1706
  	p->nr_dirtied = 0;
  	p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
83712358b   Wu Fengguang   writeback: dirty ...
1707
  	p->dirty_paused_when = 0;
9d823e8f6   Wu Fengguang   writeback: per ta...
1708

bb8cbbfee   Oleg Nesterov   tasks/fork: Remov...
1709
  	p->pdeath_signal = 0;
47e65328a   Oleg Nesterov   [PATCH] pids: kil...
1710
  	INIT_LIST_HEAD(&p->thread_group);
158e1645e   Al Viro   trim task_work: g...
1711
  	p->task_works = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1712

780de9dd2   Ingo Molnar   sched/headers, cg...
1713
  	cgroup_threadgroup_change_begin(current);
18c830df7   Oleg Nesterov   kernel/fork.c:cop...
1714
  	/*
7e47682ea   Aleksa Sarai   cgroup: allow a c...
1715
1716
1717
1718
1719
  	 * Ensure that the cgroup subsystem policies allow the new process to be
  	 * forked. It should be noted the the new process's css_set can be changed
  	 * between here and cgroup_post_fork() if an organisation operation is in
  	 * progress.
  	 */
b53202e63   Oleg Nesterov   cgroup: kill cgrp...
1720
  	retval = cgroup_can_fork(p);
7e47682ea   Aleksa Sarai   cgroup: allow a c...
1721
1722
1723
1724
  	if (retval)
  		goto bad_fork_free_pid;
  
  	/*
3f2e4e1d9   David Herrmann   fork: record star...
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
  	 * From this point on we must avoid any synchronous user-space
  	 * communication until we take the tasklist-lock. In particular, we do
  	 * not want user-space to be able to predict the process start-time by
  	 * stalling fork(2) after we recorded the start_time but before it is
  	 * visible to the system.
  	 */
  
  	p->start_time = ktime_get_ns();
  	p->real_start_time = ktime_get_boot_ns();
  
  	/*
18c830df7   Oleg Nesterov   kernel/fork.c:cop...
1736
1737
1738
  	 * Make it visible to the rest of the system, but dont wake it up yet.
  	 * Need tasklist lock for parent etc handling!
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1739
  	write_lock_irq(&tasklist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1740
  	/* CLONE_PARENT re-uses the old parent */
2d5516cbb   Oleg Nesterov   copy_process: fix...
1741
  	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1742
  		p->real_parent = current->real_parent;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1743
1744
  		p->parent_exec_id = current->parent_exec_id;
  	} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1745
  		p->real_parent = current;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1746
1747
  		p->parent_exec_id = current->self_exec_id;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1748

d83a7cb37   Josh Poimboeuf   livepatch: change...
1749
  	klp_copy_process(p);
3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1750
  	spin_lock(&current->sighand->siglock);
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1751
1752
  
  	/*
dbd952127   Kees Cook   seccomp: introduc...
1753
1754
1755
1756
1757
1758
  	 * Copy seccomp details explicitly here, in case they were changed
  	 * before holding sighand lock.
  	 */
  	copy_seccomp(p);
  
  	/*
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1759
1760
1761
1762
1763
1764
  	 * Process group and session signals need to be delivered to just the
  	 * parent before the fork or both the parent and the child after the
  	 * fork. Restart if a signal comes in before we add the new process to
  	 * it's process group.
  	 * A fatal signal pending means that current will exit, so the new
  	 * thread can't slip out of an OOM kill (or normal SIGKILL).
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1765
  	*/
23ff44402   Daniel Walker   whitespace fixes:...
1766
  	recalc_sigpending();
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1767
  	if (signal_pending(current)) {
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1768
  		retval = -ERESTARTNOINTR;
7e47682ea   Aleksa Sarai   cgroup: allow a c...
1769
  		goto bad_fork_cancel_cgroup;
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1770
  	}
3fd372262   Kirill Tkhai   pid_ns: Fix race ...
1771
1772
1773
1774
  	if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
  		retval = -ENOMEM;
  		goto bad_fork_cancel_cgroup;
  	}
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1775

73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1776
  	if (likely(p->pid)) {
4b9d33e6d   Tejun Heo   ptrace: kill clon...
1777
  		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1778

819077398   Oleg Nesterov   kernel/fork.c:cop...
1779
  		init_task_pid(p, PIDTYPE_PID, pid);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1780
  		if (thread_group_leader(p)) {
819077398   Oleg Nesterov   kernel/fork.c:cop...
1781
1782
  			init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
  			init_task_pid(p, PIDTYPE_SID, task_session(current));
1c4042c29   Eric W. Biederman   pidns: Consolidat...
1783
  			if (is_child_reaper(pid)) {
17cf22c33   Eric W. Biederman   pidns: Use task_a...
1784
  				ns_of_pid(pid)->child_reaper = p;
1c4042c29   Eric W. Biederman   pidns: Consolidat...
1785
1786
  				p->signal->flags |= SIGNAL_UNKILLABLE;
  			}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1787

fea9d1755   Oleg Nesterov   ITIMER_REAL: conv...
1788
  			p->signal->leader_pid = pid;
9c9f4ded9   Alan Cox   tty: Add a kref c...
1789
  			p->signal->tty = tty_kref_get(current->signal->tty);
749860ce2   Pavel Tikhomirov   prctl: propagate ...
1790
1791
1792
1793
1794
1795
1796
  			/*
  			 * Inherit has_child_subreaper flag under the same
  			 * tasklist_lock with adding child to the process tree
  			 * for propagate_has_child_subreaper optimization.
  			 */
  			p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
  							 p->real_parent->signal->is_child_subreaper;
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
1797
  			list_add_tail(&p->sibling, &p->real_parent->children);
5e85d4abe   Eric W. Biederman   [PATCH] task: Mak...
1798
  			list_add_tail_rcu(&p->tasks, &init_task.tasks);
819077398   Oleg Nesterov   kernel/fork.c:cop...
1799
1800
  			attach_pid(p, PIDTYPE_PGID);
  			attach_pid(p, PIDTYPE_SID);
909ea9646   Christoph Lameter   core: Replace __g...
1801
  			__this_cpu_inc(process_counts);
80628ca06   Oleg Nesterov   kernel/fork.c:cop...
1802
1803
1804
1805
  		} else {
  			current->signal->nr_threads++;
  			atomic_inc(&current->signal->live);
  			atomic_inc(&current->signal->sigcnt);
80628ca06   Oleg Nesterov   kernel/fork.c:cop...
1806
1807
  			list_add_tail_rcu(&p->thread_group,
  					  &p->group_leader->thread_group);
0c740d0af   Oleg Nesterov   introduce for_eac...
1808
1809
  			list_add_tail_rcu(&p->thread_node,
  					  &p->signal->thread_head);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1810
  		}
819077398   Oleg Nesterov   kernel/fork.c:cop...
1811
  		attach_pid(p, PIDTYPE_PID);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1812
  		nr_threads++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1813
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1814
  	total_forks++;
3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1815
  	spin_unlock(&current->sighand->siglock);
4af4206be   Oleg Nesterov   tracing: Fix sysc...
1816
  	syscall_tracepoint_update(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1817
  	write_unlock_irq(&tasklist_lock);
4af4206be   Oleg Nesterov   tracing: Fix sysc...
1818

c13cf856c   Andrew Morton   [PATCH] fork.c: p...
1819
  	proc_fork_connector(p);
b53202e63   Oleg Nesterov   cgroup: kill cgrp...
1820
  	cgroup_post_fork(p);
780de9dd2   Ingo Molnar   sched/headers, cg...
1821
  	cgroup_threadgroup_change_end(current);
cdd6c482c   Ingo Molnar   perf: Do the big ...
1822
  	perf_event_fork(p);
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
1823
1824
  
  	trace_task_newtask(p, clone_flags);
3ab679661   Oleg Nesterov   uprobes: Teach up...
1825
  	uprobe_copy_process(p, clone_flags);
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
1826

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1827
  	return p;
7e47682ea   Aleksa Sarai   cgroup: allow a c...
1828
  bad_fork_cancel_cgroup:
3fd372262   Kirill Tkhai   pid_ns: Fix race ...
1829
1830
  	spin_unlock(&current->sighand->siglock);
  	write_unlock_irq(&tasklist_lock);
b53202e63   Oleg Nesterov   cgroup: kill cgrp...
1831
  	cgroup_cancel_fork(p);
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1832
  bad_fork_free_pid:
780de9dd2   Ingo Molnar   sched/headers, cg...
1833
  	cgroup_threadgroup_change_end(current);
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1834
1835
  	if (pid != &init_struct_pid)
  		free_pid(pid);
0740aa5f6   Jiri Slaby   fork: free thread...
1836
1837
  bad_fork_cleanup_thread:
  	exit_thread(p);
fd0928df9   Jens Axboe   ioprio: move io p...
1838
  bad_fork_cleanup_io:
b69f22920   Louis Rilling   block: Fix io_con...
1839
1840
  	if (p->io_context)
  		exit_io_context(p);
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1841
  bad_fork_cleanup_namespaces:
444f378b2   Linus Torvalds   Revert "[PATCH] n...
1842
  	exit_task_namespaces(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1843
  bad_fork_cleanup_mm:
c9f01245b   David Rientjes   oom: remove oom_d...
1844
  	if (p->mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1845
1846
  		mmput(p->mm);
  bad_fork_cleanup_signal:
4ab6c0833   Oleg Nesterov   clone(): fix race...
1847
  	if (!(clone_flags & CLONE_THREAD))
1c5354de9   Mike Galbraith   sched: Move sched...
1848
  		free_signal_struct(p->signal);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1849
  bad_fork_cleanup_sighand:
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
1850
  	__cleanup_sighand(p->sighand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1851
1852
1853
1854
1855
1856
  bad_fork_cleanup_fs:
  	exit_fs(p); /* blocking */
  bad_fork_cleanup_files:
  	exit_files(p); /* blocking */
  bad_fork_cleanup_semundo:
  	exit_sem(p);
e4e55b47e   Tetsuo Handa   LSM: Revive secur...
1857
1858
  bad_fork_cleanup_security:
  	security_task_free(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1859
1860
  bad_fork_cleanup_audit:
  	audit_free(p);
6c72e3501   Peter Zijlstra   perf: fix perf bu...
1861
  bad_fork_cleanup_perf:
cdd6c482c   Ingo Molnar   perf: Do the big ...
1862
  	perf_event_free_task(p);
6c72e3501   Peter Zijlstra   perf: fix perf bu...
1863
  bad_fork_cleanup_policy:
b09be676e   Byungchul Park   locking/lockdep: ...
1864
  	lockdep_free_task(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1865
  #ifdef CONFIG_NUMA
f0be3d32b   Lee Schermerhorn   mempolicy: rename...
1866
  	mpol_put(p->mempolicy);
e8604cb43   Li Zefan   cgroup: fix spuri...
1867
  bad_fork_cleanup_threadgroup_lock:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1868
  #endif
35df17c57   Shailabh Nagar   [PATCH] task dela...
1869
  	delayacct_tsk_free(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1870
  bad_fork_cleanup_count:
d84f4f992   David Howells   CRED: Inaugurate ...
1871
  	atomic_dec(&p->cred->user->processes);
e0e817392   David Howells   CRED: Add some co...
1872
  	exit_creds(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1873
  bad_fork_free:
405c07597   Andy Lutomirski   fork: Add task st...
1874
  	p->state = TASK_DEAD;
68f24b08e   Andy Lutomirski   sched/core: Free ...
1875
  	put_task_stack(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1876
  	free_task(p);
fe7d37d1f   Oleg Nesterov   [PATCH] copy_proc...
1877
1878
  fork_out:
  	return ERR_PTR(retval);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1879
  }
f106eee10   Oleg Nesterov   pids: fix fork_id...
1880
1881
1882
1883
1884
1885
1886
1887
1888
  static inline void init_idle_pids(struct pid_link *links)
  {
  	enum pid_type type;
  
  	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
  		INIT_HLIST_NODE(&links[type].node); /* not really needed */
  		links[type].pid = &init_struct_pid;
  	}
  }
0db0628d9   Paul Gortmaker   kernel: delete __...
1889
  struct task_struct *fork_idle(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1890
  {
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1891
  	struct task_struct *task;
725fc629f   Andi Kleen   kernek/fork.c: al...
1892
1893
  	task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
  			    cpu_to_node(cpu));
f106eee10   Oleg Nesterov   pids: fix fork_id...
1894
1895
  	if (!IS_ERR(task)) {
  		init_idle_pids(task->pids);
753ca4f31   Akinobu Mita   [PATCH] fix copy_...
1896
  		init_idle(task, cpu);
f106eee10   Oleg Nesterov   pids: fix fork_id...
1897
  	}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1898

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1899
1900
  	return task;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1901
1902
1903
1904
1905
1906
  /*
   *  Ok, this is the main fork-routine.
   *
   * It copies the process, and if successful kick-starts
   * it and waits for it to finish using the VM if required.
   */
3033f14ab   Josh Triplett   clone: support pa...
1907
  long _do_fork(unsigned long clone_flags,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1908
  	      unsigned long stack_start,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1909
1910
  	      unsigned long stack_size,
  	      int __user *parent_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
1911
1912
  	      int __user *child_tidptr,
  	      unsigned long tls)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1913
1914
1915
  {
  	struct task_struct *p;
  	int trace = 0;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1916
  	long nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1917

bdff746a3   Andrew Morton   clone: prepare to...
1918
  	/*
4b9d33e6d   Tejun Heo   ptrace: kill clon...
1919
1920
1921
1922
  	 * Determine whether and which event to report to ptracer.  When
  	 * called from kernel_thread or CLONE_UNTRACED is explicitly
  	 * requested, no event is reported; otherwise, report if the event
  	 * for the type of forking is enabled.
09a05394f   Roland McGrath   tracehook: clone
1923
  	 */
e80d6661c   Al Viro   flagday: kill pt_...
1924
  	if (!(clone_flags & CLONE_UNTRACED)) {
4b9d33e6d   Tejun Heo   ptrace: kill clon...
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
  		if (clone_flags & CLONE_VFORK)
  			trace = PTRACE_EVENT_VFORK;
  		else if ((clone_flags & CSIGNAL) != SIGCHLD)
  			trace = PTRACE_EVENT_CLONE;
  		else
  			trace = PTRACE_EVENT_FORK;
  
  		if (likely(!ptrace_event_enabled(current, trace)))
  			trace = 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1935

62e791c1b   Al Viro   don't pass regs t...
1936
  	p = copy_process(clone_flags, stack_start, stack_size,
725fc629f   Andi Kleen   kernek/fork.c: al...
1937
  			 child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
38addce8b   Emese Revfy   gcc-plugins: Add ...
1938
  	add_latent_entropy();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1939
1940
1941
1942
1943
1944
  	/*
  	 * Do this prior waking up the new thread - the thread pointer
  	 * might get invalid after that point, if the thread exits quickly.
  	 */
  	if (!IS_ERR(p)) {
  		struct completion vfork;
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1945
  		struct pid *pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1946

0a16b6075   Mathieu Desnoyers   tracing, sched: L...
1947
  		trace_sched_process_fork(current, p);
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1948
1949
  		pid = get_task_pid(p, PIDTYPE_PID);
  		nr = pid_vnr(pid);
30e49c263   Pavel Emelyanov   pid namespaces: a...
1950
1951
1952
  
  		if (clone_flags & CLONE_PARENT_SETTID)
  			put_user(nr, parent_tidptr);
a6f5e0637   Sukadev Bhattiprolu   pid namespaces: m...
1953

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1954
1955
1956
  		if (clone_flags & CLONE_VFORK) {
  			p->vfork_done = &vfork;
  			init_completion(&vfork);
d68b46fe1   Oleg Nesterov   vfork: make it ki...
1957
  			get_task_struct(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1958
  		}
3e51e3edf   Samir Bellabes   sched: Remove unu...
1959
  		wake_up_new_task(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1960

4b9d33e6d   Tejun Heo   ptrace: kill clon...
1961
1962
  		/* forking complete and child started to run, tell ptracer */
  		if (unlikely(trace))
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1963
  			ptrace_event_pid(trace, pid);
09a05394f   Roland McGrath   tracehook: clone
1964

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1965
  		if (clone_flags & CLONE_VFORK) {
d68b46fe1   Oleg Nesterov   vfork: make it ki...
1966
  			if (!wait_for_vfork_done(p, &vfork))
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1967
  				ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1968
  		}
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1969
1970
  
  		put_pid(pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1971
  	} else {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1972
  		nr = PTR_ERR(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1973
  	}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1974
  	return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1975
  }
3033f14ab   Josh Triplett   clone: support pa...
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
  #ifndef CONFIG_HAVE_COPY_THREAD_TLS
  /* For compatibility with architectures that call do_fork directly rather than
   * using the syscall entry points below. */
  long do_fork(unsigned long clone_flags,
  	      unsigned long stack_start,
  	      unsigned long stack_size,
  	      int __user *parent_tidptr,
  	      int __user *child_tidptr)
  {
  	return _do_fork(clone_flags, stack_start, stack_size,
  			parent_tidptr, child_tidptr, 0);
  }
  #endif
2aa3a7f86   Al Viro   preparation for g...
1989
1990
1991
1992
1993
  /*
   * Create a kernel thread.
   */
  pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
  {
3033f14ab   Josh Triplett   clone: support pa...
1994
1995
  	return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
  		(unsigned long)arg, NULL, NULL, 0);
2aa3a7f86   Al Viro   preparation for g...
1996
  }
2aa3a7f86   Al Viro   preparation for g...
1997

d2125043a   Al Viro   generic sys_fork ...
1998
1999
2000
2001
  #ifdef __ARCH_WANT_SYS_FORK
  SYSCALL_DEFINE0(fork)
  {
  #ifdef CONFIG_MMU
3033f14ab   Josh Triplett   clone: support pa...
2002
  	return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
d2125043a   Al Viro   generic sys_fork ...
2003
2004
  #else
  	/* can not support in nommu mode */
5d59e1827   Daeseok Youn   kernel/fork.c: fi...
2005
  	return -EINVAL;
d2125043a   Al Viro   generic sys_fork ...
2006
2007
2008
2009
2010
2011
2012
  #endif
  }
  #endif
  
  #ifdef __ARCH_WANT_SYS_VFORK
  SYSCALL_DEFINE0(vfork)
  {
3033f14ab   Josh Triplett   clone: support pa...
2013
2014
  	return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
  			0, NULL, NULL, 0);
d2125043a   Al Viro   generic sys_fork ...
2015
2016
2017
2018
2019
2020
2021
  }
  #endif
  
  #ifdef __ARCH_WANT_SYS_CLONE
  #ifdef CONFIG_CLONE_BACKWARDS
  SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
  		 int __user *, parent_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
2022
  		 unsigned long, tls,
d2125043a   Al Viro   generic sys_fork ...
2023
2024
2025
2026
2027
  		 int __user *, child_tidptr)
  #elif defined(CONFIG_CLONE_BACKWARDS2)
  SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
  		 int __user *, parent_tidptr,
  		 int __user *, child_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
2028
  		 unsigned long, tls)
dfa9771a7   Michal Simek   microblaze: fix c...
2029
2030
2031
2032
2033
  #elif defined(CONFIG_CLONE_BACKWARDS3)
  SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
  		int, stack_size,
  		int __user *, parent_tidptr,
  		int __user *, child_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
2034
  		unsigned long, tls)
d2125043a   Al Viro   generic sys_fork ...
2035
2036
2037
2038
  #else
  SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
  		 int __user *, parent_tidptr,
  		 int __user *, child_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
2039
  		 unsigned long, tls)
d2125043a   Al Viro   generic sys_fork ...
2040
2041
  #endif
  {
3033f14ab   Josh Triplett   clone: support pa...
2042
  	return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
d2125043a   Al Viro   generic sys_fork ...
2043
2044
  }
  #endif
0f1b92cbd   Oleg Nesterov   introduce the wal...
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
  void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data)
  {
  	struct task_struct *leader, *parent, *child;
  	int res;
  
  	read_lock(&tasklist_lock);
  	leader = top = top->group_leader;
  down:
  	for_each_thread(leader, parent) {
  		list_for_each_entry(child, &parent->children, sibling) {
  			res = visitor(child, data);
  			if (res) {
  				if (res < 0)
  					goto out;
  				leader = child;
  				goto down;
  			}
  up:
  			;
  		}
  	}
  
  	if (leader != top) {
  		child = leader;
  		parent = child->real_parent;
  		leader = parent->group_leader;
  		goto up;
  	}
  out:
  	read_unlock(&tasklist_lock);
  }
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
2076
2077
2078
  #ifndef ARCH_MIN_MMSTRUCT_ALIGN
  #define ARCH_MIN_MMSTRUCT_ALIGN 0
  #endif
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
2079
  static void sighand_ctor(void *data)
aa1757f90   Oleg Nesterov   [PATCH] convert s...
2080
2081
  {
  	struct sighand_struct *sighand = data;
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
2082
  	spin_lock_init(&sighand->siglock);
b8fceee17   Davide Libenzi   signalfd simplifi...
2083
  	init_waitqueue_head(&sighand->signalfd_wqh);
aa1757f90   Oleg Nesterov   [PATCH] convert s...
2084
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2085
2086
2087
2088
  void __init proc_caches_init(void)
  {
  	sighand_cachep = kmem_cache_create("sighand_cache",
  			sizeof(struct sighand_struct), 0,
5f0d5a3ae   Paul E. McKenney   mm: Rename SLAB_D...
2089
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
ae63fd26b   Levin, Alexander (Sasha Levin)   kmemcheck: stop u...
2090
  			SLAB_ACCOUNT, sighand_ctor);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2091
2092
  	signal_cachep = kmem_cache_create("signal_cache",
  			sizeof(struct signal_struct), 0,
ae63fd26b   Levin, Alexander (Sasha Levin)   kmemcheck: stop u...
2093
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
5d097056c   Vladimir Davydov   kmemcg: account c...
2094
  			NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
2095
  	files_cachep = kmem_cache_create("files_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2096
  			sizeof(struct files_struct), 0,
ae63fd26b   Levin, Alexander (Sasha Levin)   kmemcheck: stop u...
2097
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
5d097056c   Vladimir Davydov   kmemcg: account c...
2098
  			NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
2099
  	fs_cachep = kmem_cache_create("fs_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2100
  			sizeof(struct fs_struct), 0,
ae63fd26b   Levin, Alexander (Sasha Levin)   kmemcheck: stop u...
2101
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
5d097056c   Vladimir Davydov   kmemcg: account c...
2102
  			NULL);
6345d24da   Linus Torvalds   mm: Fix boot cras...
2103
2104
2105
2106
2107
2108
2109
  	/*
  	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
  	 * whole struct cpumask for the OFFSTACK case. We could change
  	 * this to *only* allocate as much of it as required by the
  	 * maximum number of CPU's we can ever have.  The cpumask_allocation
  	 * is at the end of the structure, exactly for that reason.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2110
  	mm_cachep = kmem_cache_create("mm_struct",
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
2111
  			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
ae63fd26b   Levin, Alexander (Sasha Levin)   kmemcheck: stop u...
2112
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
5d097056c   Vladimir Davydov   kmemcg: account c...
2113
2114
  			NULL);
  	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
8feae1311   David Howells   NOMMU: Make VMAs ...
2115
  	mmap_init();
665771939   Al Viro   make sure that ns...
2116
  	nsproxy_cache_init();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2117
  }
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2118

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2119
  /*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2120
   * Check constraints on flags passed to the unshare system call.
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2121
   */
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2122
  static int check_unshare_flags(unsigned long unshare_flags)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2123
  {
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2124
2125
  	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
  				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
50804fe37   Eric W. Biederman   pidns: Support un...
2126
  				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
a79a908fd   Aditya Kali   cgroup: introduce...
2127
  				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2128
  		return -EINVAL;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2129
  	/*
12c641ab8   Eric W. Biederman   unshare: Unsharin...
2130
2131
2132
2133
  	 * Not implemented, but pretend it works if there is nothing
  	 * to unshare.  Note that unsharing the address space or the
  	 * signal handlers also need to unshare the signal queues (aka
  	 * CLONE_THREAD).
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2134
  	 */
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2135
  	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
12c641ab8   Eric W. Biederman   unshare: Unsharin...
2136
2137
2138
2139
2140
2141
2142
2143
2144
  		if (!thread_group_empty(current))
  			return -EINVAL;
  	}
  	if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
  		if (atomic_read(&current->sighand->count) > 1)
  			return -EINVAL;
  	}
  	if (unshare_flags & CLONE_VM) {
  		if (!current_is_single_threaded())
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2145
2146
  			return -EINVAL;
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2147
2148
2149
2150
2151
  
  	return 0;
  }
  
  /*
99d1419d9   JANAK DESAI   [PATCH] unshare s...
2152
   * Unshare the filesystem structure if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2153
2154
2155
2156
   */
  static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
  {
  	struct fs_struct *fs = current->fs;
498052bba   Al Viro   New locking/refco...
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
  	if (!(unshare_flags & CLONE_FS) || !fs)
  		return 0;
  
  	/* don't need lock here; in the worst case we'll do useless copy */
  	if (fs->users == 1)
  		return 0;
  
  	*new_fsp = copy_fs_struct(fs);
  	if (!*new_fsp)
  		return -ENOMEM;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2167
2168
2169
2170
2171
  
  	return 0;
  }
  
  /*
a016f3389   JANAK DESAI   [PATCH] unshare s...
2172
   * Unshare file descriptor table if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2173
2174
2175
2176
   */
  static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
  {
  	struct files_struct *fd = current->files;
a016f3389   JANAK DESAI   [PATCH] unshare s...
2177
  	int error = 0;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2178
2179
  
  	if ((unshare_flags & CLONE_FILES) &&
a016f3389   JANAK DESAI   [PATCH] unshare s...
2180
2181
2182
2183
2184
  	    (fd && atomic_read(&fd->count) > 1)) {
  		*new_fdp = dup_fd(fd, &error);
  		if (!*new_fdp)
  			return error;
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2185
2186
2187
2188
2189
  
  	return 0;
  }
  
  /*
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2190
2191
2192
2193
2194
2195
2196
   * unshare allows a process to 'unshare' part of the process
   * context which was originally shared using clone.  copy_*
   * functions used by do_fork() cannot be used here directly
   * because they modify an inactive task_struct that is being
   * constructed. Here we are modifying the current, active,
   * task_struct.
   */
6559eed8c   Heiko Carstens   [CVE-2009-0029] S...
2197
  SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2198
  {
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2199
  	struct fs_struct *fs, *new_fs = NULL;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2200
  	struct files_struct *fd, *new_fd = NULL;
b2e0d9870   Eric W. Biederman   userns: Implement...
2201
  	struct cred *new_cred = NULL;
cf7b708c8   Pavel Emelyanov   Make access to ta...
2202
  	struct nsproxy *new_nsproxy = NULL;
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
2203
  	int do_sysvsem = 0;
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2204
  	int err;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2205

50804fe37   Eric W. Biederman   pidns: Support un...
2206
  	/*
faf00da54   Eric W. Biederman   userns,pidns: For...
2207
2208
  	 * If unsharing a user namespace must also unshare the thread group
  	 * and unshare the filesystem root and working directories.
b2e0d9870   Eric W. Biederman   userns: Implement...
2209
2210
  	 */
  	if (unshare_flags & CLONE_NEWUSER)
e66eded83   Eric W. Biederman   userns: Don't all...
2211
  		unshare_flags |= CLONE_THREAD | CLONE_FS;
b2e0d9870   Eric W. Biederman   userns: Implement...
2212
  	/*
50804fe37   Eric W. Biederman   pidns: Support un...
2213
2214
2215
2216
  	 * If unsharing vm, must also unshare signal handlers.
  	 */
  	if (unshare_flags & CLONE_VM)
  		unshare_flags |= CLONE_SIGHAND;
6013f67fc   Manfred Spraul   ipc: sysvsem: for...
2217
  	/*
12c641ab8   Eric W. Biederman   unshare: Unsharin...
2218
2219
2220
2221
2222
  	 * If unsharing a signal handlers, must also unshare the signal queues.
  	 */
  	if (unshare_flags & CLONE_SIGHAND)
  		unshare_flags |= CLONE_THREAD;
  	/*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2223
2224
2225
2226
  	 * If unsharing namespace, must also unshare filesystem information.
  	 */
  	if (unshare_flags & CLONE_NEWNS)
  		unshare_flags |= CLONE_FS;
50804fe37   Eric W. Biederman   pidns: Support un...
2227
2228
2229
2230
  
  	err = check_unshare_flags(unshare_flags);
  	if (err)
  		goto bad_unshare_out;
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2231
  	/*
6013f67fc   Manfred Spraul   ipc: sysvsem: for...
2232
2233
2234
2235
2236
  	 * CLONE_NEWIPC must also detach from the undolist: after switching
  	 * to a new ipc namespace, the semaphore arrays from the old
  	 * namespace are unreachable.
  	 */
  	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
2237
  		do_sysvsem = 1;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
2238
2239
  	err = unshare_fs(unshare_flags, &new_fs);
  	if (err)
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2240
  		goto bad_unshare_out;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
2241
2242
  	err = unshare_fd(unshare_flags, &new_fd);
  	if (err)
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2243
  		goto bad_unshare_cleanup_fs;
b2e0d9870   Eric W. Biederman   userns: Implement...
2244
  	err = unshare_userns(unshare_flags, &new_cred);
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
2245
  	if (err)
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
2246
  		goto bad_unshare_cleanup_fd;
b2e0d9870   Eric W. Biederman   userns: Implement...
2247
2248
2249
2250
  	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
  					 new_cred, new_fs);
  	if (err)
  		goto bad_unshare_cleanup_cred;
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
2251

b2e0d9870   Eric W. Biederman   userns: Implement...
2252
  	if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
2253
2254
2255
2256
2257
2258
  		if (do_sysvsem) {
  			/*
  			 * CLONE_SYSVSEM is equivalent to sys_exit().
  			 */
  			exit_sem(current);
  		}
ab602f799   Jack Miller   shm: make exit_sh...
2259
2260
2261
2262
2263
  		if (unshare_flags & CLONE_NEWIPC) {
  			/* Orphan segments in old ns (see sem above). */
  			exit_shm(current);
  			shm_init_task(current);
  		}
ab516013a   Serge E. Hallyn   [PATCH] namespace...
2264

6f977e6b2   Alan Cox   fork: unshare: re...
2265
  		if (new_nsproxy)
cf7b708c8   Pavel Emelyanov   Make access to ta...
2266
  			switch_task_namespaces(current, new_nsproxy);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2267

cf7b708c8   Pavel Emelyanov   Make access to ta...
2268
  		task_lock(current);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2269
2270
  		if (new_fs) {
  			fs = current->fs;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
2271
  			spin_lock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2272
  			current->fs = new_fs;
498052bba   Al Viro   New locking/refco...
2273
2274
2275
2276
  			if (--fs->users)
  				new_fs = NULL;
  			else
  				new_fs = fs;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
2277
  			spin_unlock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2278
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2279
2280
2281
2282
2283
2284
2285
  		if (new_fd) {
  			fd = current->files;
  			current->files = new_fd;
  			new_fd = fd;
  		}
  
  		task_unlock(current);
b2e0d9870   Eric W. Biederman   userns: Implement...
2286
2287
2288
2289
2290
2291
  
  		if (new_cred) {
  			/* Install the new user namespace */
  			commit_creds(new_cred);
  			new_cred = NULL;
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2292
  	}
e42226732   Hari Bathini   perf: Add PERF_RE...
2293
  	perf_event_namespaces(current);
b2e0d9870   Eric W. Biederman   userns: Implement...
2294
2295
2296
  bad_unshare_cleanup_cred:
  	if (new_cred)
  		put_cred(new_cred);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2297
2298
2299
  bad_unshare_cleanup_fd:
  	if (new_fd)
  		put_files_struct(new_fd);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2300
2301
  bad_unshare_cleanup_fs:
  	if (new_fs)
498052bba   Al Viro   New locking/refco...
2302
  		free_fs_struct(new_fs);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2303

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2304
2305
2306
  bad_unshare_out:
  	return err;
  }
3b1253880   Al Viro   [PATCH] sanitize ...
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
  
  /*
   *	Helper to unshare the files of the current task.
   *	We don't want to expose copy_files internals to
   *	the exec layer of the kernel.
   */
  
  int unshare_files(struct files_struct **displaced)
  {
  	struct task_struct *task = current;
50704516f   Al Viro   Fix uninitialized...
2317
  	struct files_struct *copy = NULL;
3b1253880   Al Viro   [PATCH] sanitize ...
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
  	int error;
  
  	error = unshare_fd(CLONE_FILES, &copy);
  	if (error || !copy) {
  		*displaced = NULL;
  		return error;
  	}
  	*displaced = task->files;
  	task_lock(task);
  	task->files = copy;
  	task_unlock(task);
  	return 0;
  }
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
  
  int sysctl_max_threads(struct ctl_table *table, int write,
  		       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	struct ctl_table t;
  	int ret;
  	int threads = max_threads;
  	int min = MIN_THREADS;
  	int max = MAX_THREADS;
  
  	t = *table;
  	t.data = &threads;
  	t.extra1 = &min;
  	t.extra2 = &max;
  
  	ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
  	if (ret || !write)
  		return ret;
  
  	set_max_threads(threads);
  
  	return 0;
  }