Blame view

kernel/fork.c 55.8 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   *  linux/kernel/fork.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
  /*
   *  'fork.c' contains the help-routines for the 'fork' system call
   * (see also entry.S and others).
   * Fork is rather simple, once you get the hang of it, but the memory
   * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/unistd.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
18
  #include <linux/module.h>
  #include <linux/vmalloc.h>
  #include <linux/completion.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
22
  #include <linux/personality.h>
  #include <linux/mempolicy.h>
  #include <linux/sem.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
23
  #include <linux/fdtable.h>
da9cbc873   Jens Axboe   block: blkdev.h c...
24
  #include <linux/iocontext.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
  #include <linux/key.h>
  #include <linux/binfmts.h>
  #include <linux/mman.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
28
  #include <linux/mmu_notifier.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  #include <linux/fs.h>
615d6e875   Davidlohr Bueso   mm: per-thread vm...
30
31
  #include <linux/mm.h>
  #include <linux/vmacache.h>
ab516013a   Serge E. Hallyn   [PATCH] namespace...
32
  #include <linux/nsproxy.h>
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
33
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34
  #include <linux/cpu.h>
b4f48b636   Paul Menage   Task Control Grou...
35
  #include <linux/cgroup.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
  #include <linux/security.h>
a1e78772d   Mel Gorman   hugetlb: reserve ...
37
  #include <linux/hugetlb.h>
e2cfabdfd   Will Drewry   seccomp: add syst...
38
  #include <linux/seccomp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
39
40
41
42
  #include <linux/swap.h>
  #include <linux/syscalls.h>
  #include <linux/jiffies.h>
  #include <linux/futex.h>
8141c7f3e   Linus Torvalds   Move "exit_robust...
43
  #include <linux/compat.h>
207205a2b   Eric Dumazet   kthread: NUMA awa...
44
  #include <linux/kthread.h>
7c3ab7381   Andrew Morton   [PATCH] io-accoun...
45
  #include <linux/task_io_accounting_ops.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
46
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
47
48
49
  #include <linux/ptrace.h>
  #include <linux/mount.h>
  #include <linux/audit.h>
78fb74669   Pavel Emelianov   Memory controller...
50
  #include <linux/memcontrol.h>
f201ae235   Frederic Weisbecker   tracing/function-...
51
  #include <linux/ftrace.h>
5e2bf0142   Mike Galbraith   namespaces, pid_n...
52
  #include <linux/proc_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
54
  #include <linux/profile.h>
  #include <linux/rmap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
55
  #include <linux/ksm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
56
  #include <linux/acct.h>
8f0ab5147   Jay Lan   [PATCH] csa: conv...
57
  #include <linux/tsacct_kern.h>
9f46080c4   Matt Helsley   [PATCH] Process E...
58
  #include <linux/cn_proc.h>
ba96a0c88   Rafael J. Wysocki   freezer: fix vfor...
59
  #include <linux/freezer.h>
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
60
  #include <linux/delayacct.h>
ad4ecbcba   Shailabh Nagar   [PATCH] delay acc...
61
  #include <linux/taskstats_kern.h>
0a4254058   Arjan van de Ven   [PATCH] Add the c...
62
  #include <linux/random.h>
522ed7767   Miloslav Trmac   Audit: add TTY in...
63
  #include <linux/tty.h>
fd0928df9   Jens Axboe   ioprio: move io p...
64
  #include <linux/blkdev.h>
5ad4e53bd   Al Viro   Get rid of indire...
65
  #include <linux/fs_struct.h>
7c9f8861e   Eric Sandeen   stackprotector: u...
66
  #include <linux/magic.h>
cdd6c482c   Ingo Molnar   perf: Do the big ...
67
  #include <linux/perf_event.h>
42c4ab41a   Stanislaw Gruszka   itimers: Merge IT...
68
  #include <linux/posix-timers.h>
8e7cac798   Avi Kivity   core: Fix user re...
69
  #include <linux/user-return-notifier.h>
3d5992d2a   Ying Han   oom: add per-mm o...
70
  #include <linux/oom.h>
ba76149f4   Andrea Arcangeli   thp: khugepaged
71
  #include <linux/khugepaged.h>
d80e731ec   Oleg Nesterov   epoll: introduce ...
72
  #include <linux/signalfd.h>
0326f5a94   Srikar Dronamraju   uprobes/core: Han...
73
  #include <linux/uprobes.h>
a27bb332c   Kent Overstreet   aio: don't includ...
74
  #include <linux/aio.h>
52f5684c8   Gideon Israel Dsouza   kernel: use macro...
75
  #include <linux/compiler.h>
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
76
  #include <linux/sysctl.h>
5c9a8750a   Dmitry Vyukov   kernel: add kcov ...
77
  #include <linux/kcov.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
78
79
80
81
82
83
84
  
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
  #include <asm/uaccess.h>
  #include <asm/mmu_context.h>
  #include <asm/cacheflush.h>
  #include <asm/tlbflush.h>
ad8d75fff   Steven Rostedt   tracing/events: m...
85
  #include <trace/events/sched.h>
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
86
87
  #define CREATE_TRACE_POINTS
  #include <trace/events/task.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88
  /*
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
89
90
91
92
93
94
95
96
97
98
   * Minimum number of threads to boot the kernel
   */
  #define MIN_THREADS 20
  
  /*
   * Maximum number of threads
   */
  #define MAX_THREADS FUTEX_TID_MASK
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99
100
101
   * Protected counters by write_lock_irq(&tasklist_lock)
   */
  unsigned long total_forks;	/* Handle normal Linux uptimes. */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
102
  int nr_threads;			/* The idle threads do not count.. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
105
106
  
  int max_threads;		/* tunable limit on nr_threads */
  
  DEFINE_PER_CPU(unsigned long, process_counts) = 0;
c59923a15   Christoph Hellwig   [PATCH] remove th...
107
  __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
db1466b3e   Paul E. McKenney   rcu: Use wrapper ...
108
109
110
111
112
113
114
115
  
  #ifdef CONFIG_PROVE_RCU
  int lockdep_tasklist_lock_is_held(void)
  {
  	return lockdep_is_held(&tasklist_lock);
  }
  EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
  #endif /* #ifdef CONFIG_PROVE_RCU */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
116
117
118
119
120
  
  int nr_processes(void)
  {
  	int cpu;
  	int total = 0;
1d5107509   Ian Campbell   Correct nr_proces...
121
  	for_each_possible_cpu(cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
123
124
125
  		total += per_cpu(process_counts, cpu);
  
  	return total;
  }
f19b9f74b   Akinobu Mita   fork: fix error h...
126
127
128
  void __weak arch_release_task_struct(struct task_struct *tsk)
  {
  }
f5e102873   Thomas Gleixner   task_allocator: U...
129
  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
130
  static struct kmem_cache *task_struct_cachep;
41101809a   Thomas Gleixner   fork: Provide wea...
131
132
133
134
135
  
  static inline struct task_struct *alloc_task_struct_node(int node)
  {
  	return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
  }
41101809a   Thomas Gleixner   fork: Provide wea...
136
137
  static inline void free_task_struct(struct task_struct *tsk)
  {
41101809a   Thomas Gleixner   fork: Provide wea...
138
139
  	kmem_cache_free(task_struct_cachep, tsk);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
  #endif
b235beea9   Linus Torvalds   Clarify naming of...
141
  void __weak arch_release_thread_stack(unsigned long *stack)
f19b9f74b   Akinobu Mita   fork: fix error h...
142
143
  {
  }
b235beea9   Linus Torvalds   Clarify naming of...
144
  #ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
41101809a   Thomas Gleixner   fork: Provide wea...
145

0d15d74a1   Thomas Gleixner   fork: Provide kme...
146
147
148
149
  /*
   * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
   * kmemcache based allocator.
   */
ba14a194a   Andy Lutomirski   fork: Add generic...
150
  # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
ac496bf48   Andy Lutomirski   fork: Optimize ta...
151
152
153
154
155
156
157
158
159
  
  #ifdef CONFIG_VMAP_STACK
  /*
   * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
   * flush.  Try to minimize the number of calls by caching stacks.
   */
  #define NR_CACHED_STACKS 2
  static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
  #endif
ba14a194a   Andy Lutomirski   fork: Add generic...
160
  static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
b69c49b78   FUJITA Tomonori   clean up duplicat...
161
  {
ba14a194a   Andy Lutomirski   fork: Add generic...
162
  #ifdef CONFIG_VMAP_STACK
ac496bf48   Andy Lutomirski   fork: Optimize ta...
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
  	void *stack;
  	int i;
  
  	local_irq_disable();
  	for (i = 0; i < NR_CACHED_STACKS; i++) {
  		struct vm_struct *s = this_cpu_read(cached_stacks[i]);
  
  		if (!s)
  			continue;
  		this_cpu_write(cached_stacks[i], NULL);
  
  		tsk->stack_vm_area = s;
  		local_irq_enable();
  		return s->addr;
  	}
  	local_irq_enable();
  
  	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
  				     VMALLOC_START, VMALLOC_END,
  				     THREADINFO_GFP | __GFP_HIGHMEM,
  				     PAGE_KERNEL,
  				     0, node, __builtin_return_address(0));
ba14a194a   Andy Lutomirski   fork: Add generic...
185
186
187
188
189
190
191
192
193
194
  
  	/*
  	 * We can't call find_vm_area() in interrupt context, and
  	 * free_thread_stack() can be called in interrupt context,
  	 * so cache the vm_struct.
  	 */
  	if (stack)
  		tsk->stack_vm_area = find_vm_area(stack);
  	return stack;
  #else
4949148ad   Vladimir Davydov   mm: charge/unchar...
195
196
  	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
  					     THREAD_SIZE_ORDER);
b6a84016b   Eric Dumazet   mm: NUMA aware al...
197
198
  
  	return page ? page_address(page) : NULL;
ba14a194a   Andy Lutomirski   fork: Add generic...
199
  #endif
b69c49b78   FUJITA Tomonori   clean up duplicat...
200
  }
ba14a194a   Andy Lutomirski   fork: Add generic...
201
  static inline void free_thread_stack(struct task_struct *tsk)
b69c49b78   FUJITA Tomonori   clean up duplicat...
202
  {
ac496bf48   Andy Lutomirski   fork: Optimize ta...
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  #ifdef CONFIG_VMAP_STACK
  	if (task_stack_vm_area(tsk)) {
  		unsigned long flags;
  		int i;
  
  		local_irq_save(flags);
  		for (i = 0; i < NR_CACHED_STACKS; i++) {
  			if (this_cpu_read(cached_stacks[i]))
  				continue;
  
  			this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
  			local_irq_restore(flags);
  			return;
  		}
  		local_irq_restore(flags);
ba14a194a   Andy Lutomirski   fork: Add generic...
218
  		vfree(tsk->stack);
ac496bf48   Andy Lutomirski   fork: Optimize ta...
219
220
221
222
223
  		return;
  	}
  #endif
  
  	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
b69c49b78   FUJITA Tomonori   clean up duplicat...
224
  }
0d15d74a1   Thomas Gleixner   fork: Provide kme...
225
  # else
b235beea9   Linus Torvalds   Clarify naming of...
226
  static struct kmem_cache *thread_stack_cache;
0d15d74a1   Thomas Gleixner   fork: Provide kme...
227

9521d3997   Michael Ellerman   Fix build break i...
228
  static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
0d15d74a1   Thomas Gleixner   fork: Provide kme...
229
230
  						  int node)
  {
b235beea9   Linus Torvalds   Clarify naming of...
231
  	return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
0d15d74a1   Thomas Gleixner   fork: Provide kme...
232
  }
ba14a194a   Andy Lutomirski   fork: Add generic...
233
  static void free_thread_stack(struct task_struct *tsk)
0d15d74a1   Thomas Gleixner   fork: Provide kme...
234
  {
ba14a194a   Andy Lutomirski   fork: Add generic...
235
  	kmem_cache_free(thread_stack_cache, tsk->stack);
0d15d74a1   Thomas Gleixner   fork: Provide kme...
236
  }
b235beea9   Linus Torvalds   Clarify naming of...
237
  void thread_stack_cache_init(void)
0d15d74a1   Thomas Gleixner   fork: Provide kme...
238
  {
b235beea9   Linus Torvalds   Clarify naming of...
239
  	thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE,
0d15d74a1   Thomas Gleixner   fork: Provide kme...
240
  					      THREAD_SIZE, 0, NULL);
b235beea9   Linus Torvalds   Clarify naming of...
241
  	BUG_ON(thread_stack_cache == NULL);
0d15d74a1   Thomas Gleixner   fork: Provide kme...
242
243
  }
  # endif
b69c49b78   FUJITA Tomonori   clean up duplicat...
244
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245
  /* SLAB cache for signal_struct structures (tsk->signal) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
246
  static struct kmem_cache *signal_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
247
248
  
  /* SLAB cache for sighand_struct structures (tsk->sighand) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
249
  struct kmem_cache *sighand_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
251
  
  /* SLAB cache for files_struct structures (tsk->files) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
252
  struct kmem_cache *files_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
254
  
  /* SLAB cache for fs_struct structures (tsk->fs) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
255
  struct kmem_cache *fs_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256
257
  
  /* SLAB cache for vm_area_struct structures */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
258
  struct kmem_cache *vm_area_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
259
260
  
  /* SLAB cache for mm_struct structures (tsk->mm) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
261
  static struct kmem_cache *mm_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262

ba14a194a   Andy Lutomirski   fork: Add generic...
263
  static void account_kernel_stack(struct task_struct *tsk, int account)
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
264
  {
ba14a194a   Andy Lutomirski   fork: Add generic...
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
  	void *stack = task_stack_page(tsk);
  	struct vm_struct *vm = task_stack_vm_area(tsk);
  
  	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
  
  	if (vm) {
  		int i;
  
  		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
  
  		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
  			mod_zone_page_state(page_zone(vm->pages[i]),
  					    NR_KERNEL_STACK_KB,
  					    PAGE_SIZE / 1024 * account);
  		}
  
  		/* All stack pages belong to the same memcg. */
  		memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
  					    account * (THREAD_SIZE / 1024));
  	} else {
  		/*
  		 * All stack pages are in the same zone and belong to the
  		 * same memcg.
  		 */
  		struct page *first_page = virt_to_page(stack);
  
  		mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
  				    THREAD_SIZE / 1024 * account);
  
  		memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
  					    account * (THREAD_SIZE / 1024));
  	}
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
297
  }
68f24b08e   Andy Lutomirski   sched/core: Free ...
298
  static void release_task_stack(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299
  {
405c07597   Andy Lutomirski   fork: Add task st...
300
301
  	if (WARN_ON(tsk->state != TASK_DEAD))
  		return;  /* Better to leak the stack than to free prematurely */
ba14a194a   Andy Lutomirski   fork: Add generic...
302
  	account_kernel_stack(tsk, -1);
b235beea9   Linus Torvalds   Clarify naming of...
303
  	arch_release_thread_stack(tsk->stack);
ba14a194a   Andy Lutomirski   fork: Add generic...
304
  	free_thread_stack(tsk);
68f24b08e   Andy Lutomirski   sched/core: Free ...
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
  	tsk->stack = NULL;
  #ifdef CONFIG_VMAP_STACK
  	tsk->stack_vm_area = NULL;
  #endif
  }
  
  #ifdef CONFIG_THREAD_INFO_IN_TASK
  void put_task_stack(struct task_struct *tsk)
  {
  	if (atomic_dec_and_test(&tsk->stack_refcount))
  		release_task_stack(tsk);
  }
  #endif
  
  void free_task(struct task_struct *tsk)
  {
  #ifndef CONFIG_THREAD_INFO_IN_TASK
  	/*
  	 * The task is finally done with both the stack and thread_info,
  	 * so free both.
  	 */
  	release_task_stack(tsk);
  #else
  	/*
  	 * If the task had a separate stack allocation, it should be gone
  	 * by now.
  	 */
  	WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
  #endif
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
334
  	rt_mutex_debug_task_free(tsk);
fb52607af   Frederic Weisbecker   tracing/function-...
335
  	ftrace_graph_exit_task(tsk);
e2cfabdfd   Will Drewry   seccomp: add syst...
336
  	put_seccomp_filter(tsk);
f19b9f74b   Akinobu Mita   fork: fix error h...
337
  	arch_release_task_struct(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
338
339
340
  	free_task_struct(tsk);
  }
  EXPORT_SYMBOL(free_task);
ea6d290ca   Oleg Nesterov   signals: make tas...
341
342
  static inline void free_signal_struct(struct signal_struct *sig)
  {
97101eb41   Oleg Nesterov   exit: move taskst...
343
  	taskstats_tgid_free(sig);
1c5354de9   Mike Galbraith   sched: Move sched...
344
  	sched_autogroup_exit(sig);
7283094ec   Michal Hocko   kernel, oom: fix ...
345
346
347
348
  	/*
  	 * __mmdrop is not safe to call from softirq context on x86 due to
  	 * pgd_dtor so postpone it to the async context
  	 */
26db62f17   Michal Hocko   oom: keep mm of t...
349
  	if (sig->oom_mm)
7283094ec   Michal Hocko   kernel, oom: fix ...
350
  		mmdrop_async(sig->oom_mm);
ea6d290ca   Oleg Nesterov   signals: make tas...
351
352
353
354
355
  	kmem_cache_free(signal_cachep, sig);
  }
  
  static inline void put_signal_struct(struct signal_struct *sig)
  {
1c5354de9   Mike Galbraith   sched: Move sched...
356
  	if (atomic_dec_and_test(&sig->sigcnt))
ea6d290ca   Oleg Nesterov   signals: make tas...
357
358
  		free_signal_struct(sig);
  }
158d9ebd1   Andrew Morton   [PATCH] resurrect...
359
  void __put_task_struct(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
360
  {
270f722d4   Eugene Teo   Fix tsk->exit_sta...
361
  	WARN_ON(!tsk->exit_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
362
363
  	WARN_ON(atomic_read(&tsk->usage));
  	WARN_ON(tsk == current);
2e91fa7f6   Tejun Heo   cgroup: keep zomb...
364
  	cgroup_free(tsk);
156654f49   Mike Galbraith   sched/numa: Move ...
365
  	task_numa_free(tsk);
1a2a4d06e   Kees Cook   security: create ...
366
  	security_task_free(tsk);
e0e817392   David Howells   CRED: Add some co...
367
  	exit_creds(tsk);
35df17c57   Shailabh Nagar   [PATCH] task dela...
368
  	delayacct_tsk_free(tsk);
ea6d290ca   Oleg Nesterov   signals: make tas...
369
  	put_signal_struct(tsk->signal);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
371
372
373
  
  	if (!profile_handoff_task(tsk))
  		free_task(tsk);
  }
77c100c83   Rik van Riel   export pid symbol...
374
  EXPORT_SYMBOL_GPL(__put_task_struct);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
375

6c0a9fa62   Thomas Gleixner   fork: Remove the ...
376
  void __init __weak arch_task_cache_init(void) { }
61c4628b5   Suresh Siddha   x86, fpu: split F...
377

ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
378
379
380
  /*
   * set_max_threads
   */
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
381
  static void set_max_threads(unsigned int max_threads_suggested)
ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
382
  {
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
383
  	u64 threads;
ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
384
385
  
  	/*
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
386
387
  	 * The number of threads shall be limited such that the thread
  	 * structures may only consume a small part of the available memory.
ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
388
  	 */
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
389
390
391
392
393
  	if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64)
  		threads = MAX_THREADS;
  	else
  		threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
  				    (u64) THREAD_SIZE * 8UL);
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
394
395
  	if (threads > max_threads_suggested)
  		threads = max_threads_suggested;
ac1b398de   Heinrich Schuchardt   kernel/fork.c: av...
396
  	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
397
  }
5aaeb5c01   Ingo Molnar   x86/fpu, sched: I...
398
399
400
401
  #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
  /* Initialized by the architecture: */
  int arch_task_struct_size __read_mostly;
  #endif
0c8c0f03e   Dave Hansen   x86/fpu, sched: D...
402

ff691f6e0   Heinrich Schuchardt   kernel/fork.c: ne...
403
  void __init fork_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
404
  {
25f9c0817   Eric W. Biederman   userns: Generaliz...
405
  	int i;
f5e102873   Thomas Gleixner   task_allocator: U...
406
  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407
408
409
410
  #ifndef ARCH_MIN_TASKALIGN
  #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
  #endif
  	/* create a slab on which task_structs can be allocated */
5d097056c   Vladimir Davydov   kmemcg: account c...
411
412
413
  	task_struct_cachep = kmem_cache_create("task_struct",
  			arch_task_struct_size, ARCH_MIN_TASKALIGN,
  			SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
414
  #endif
61c4628b5   Suresh Siddha   x86, fpu: split F...
415
416
  	/* do the arch specific task caches init */
  	arch_task_cache_init();
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
417
  	set_max_threads(MAX_THREADS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
419
420
421
422
  
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
  		init_task.signal->rlim[RLIMIT_NPROC];
b376c3e1b   Eric W. Biederman   userns: Add a lim...
423

25f9c0817   Eric W. Biederman   userns: Generaliz...
424
425
426
  	for (i = 0; i < UCOUNT_COUNTS; i++) {
  		init_user_ns.ucount_max[i] = max_threads/2;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
427
  }
52f5684c8   Gideon Israel Dsouza   kernel: use macro...
428
  int __weak arch_dup_task_struct(struct task_struct *dst,
61c4628b5   Suresh Siddha   x86, fpu: split F...
429
430
431
432
433
  					       struct task_struct *src)
  {
  	*dst = *src;
  	return 0;
  }
d4311ff1a   Aaron Tomlin   init/main.c: Give...
434
435
436
437
438
439
440
  void set_task_stack_end_magic(struct task_struct *tsk)
  {
  	unsigned long *stackend;
  
  	stackend = end_of_stack(tsk);
  	*stackend = STACK_END_MAGIC;	/* for overflow detection */
  }
725fc629f   Andi Kleen   kernek/fork.c: al...
441
  static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
443
  {
  	struct task_struct *tsk;
b235beea9   Linus Torvalds   Clarify naming of...
444
  	unsigned long *stack;
ba14a194a   Andy Lutomirski   fork: Add generic...
445
  	struct vm_struct *stack_vm_area;
3e26c149c   Peter Zijlstra   mm: dirty balanci...
446
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
447

725fc629f   Andi Kleen   kernek/fork.c: al...
448
449
  	if (node == NUMA_NO_NODE)
  		node = tsk_fork_get_node(orig);
504f52b54   Eric Dumazet   mm: NUMA aware al...
450
  	tsk = alloc_task_struct_node(node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
451
452
  	if (!tsk)
  		return NULL;
b235beea9   Linus Torvalds   Clarify naming of...
453
454
  	stack = alloc_thread_stack_node(tsk, node);
  	if (!stack)
f19b9f74b   Akinobu Mita   fork: fix error h...
455
  		goto free_tsk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
456

ba14a194a   Andy Lutomirski   fork: Add generic...
457
  	stack_vm_area = task_stack_vm_area(tsk);
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
458
  	err = arch_dup_task_struct(tsk, orig);
ba14a194a   Andy Lutomirski   fork: Add generic...
459
460
461
462
463
464
465
466
467
468
  
  	/*
  	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
  	 * sure they're properly initialized before using any stack-related
  	 * functions again.
  	 */
  	tsk->stack = stack;
  #ifdef CONFIG_VMAP_STACK
  	tsk->stack_vm_area = stack_vm_area;
  #endif
68f24b08e   Andy Lutomirski   sched/core: Free ...
469
470
471
  #ifdef CONFIG_THREAD_INFO_IN_TASK
  	atomic_set(&tsk->stack_refcount, 1);
  #endif
ba14a194a   Andy Lutomirski   fork: Add generic...
472

164c33c6a   Salman Qazi   sched: Fix fork()...
473
  	if (err)
b235beea9   Linus Torvalds   Clarify naming of...
474
  		goto free_stack;
164c33c6a   Salman Qazi   sched: Fix fork()...
475

dbd952127   Kees Cook   seccomp: introduc...
476
477
478
479
480
481
482
483
484
  #ifdef CONFIG_SECCOMP
  	/*
  	 * We must handle setting up seccomp filters once we're under
  	 * the sighand lock in case orig has changed between now and
  	 * then. Until then, filter must be NULL to avoid messing up
  	 * the usage counts on the error path calling free_task.
  	 */
  	tsk->seccomp.filter = NULL;
  #endif
87bec58a5   Andrew Morton   revert "sched: Fi...
485
486
  
  	setup_thread_stack(tsk, orig);
8e7cac798   Avi Kivity   core: Fix user re...
487
  	clear_user_return_notifier(tsk);
f26f9aff6   Mike Galbraith   Sched: fix skip_c...
488
  	clear_tsk_need_resched(tsk);
d4311ff1a   Aaron Tomlin   init/main.c: Give...
489
  	set_task_stack_end_magic(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
490

0a4254058   Arjan van de Ven   [PATCH] Add the c...
491
492
493
  #ifdef CONFIG_CC_STACKPROTECTOR
  	tsk->stack_canary = get_random_int();
  #endif
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
494
495
496
497
498
  	/*
  	 * One for us, one for whoever does the "release_task()" (usually
  	 * parent)
  	 */
  	atomic_set(&tsk->usage, 2);
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
499
  #ifdef CONFIG_BLK_DEV_IO_TRACE
2056a782f   Jens Axboe   [PATCH] Block que...
500
  	tsk->btrace_seq = 0;
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
501
  #endif
a0aa7f68a   Jens Axboe   [PATCH] Don't inh...
502
  	tsk->splice_pipe = NULL;
5640f7685   Eric Dumazet   net: use a per ta...
503
  	tsk->task_frag.page = NULL;
093e5840a   Sebastian Andrzej Siewior   sched/core: Reset...
504
  	tsk->wake_q.next = NULL;
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
505

ba14a194a   Andy Lutomirski   fork: Add generic...
506
  	account_kernel_stack(tsk, 1);
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
507

5c9a8750a   Dmitry Vyukov   kernel: add kcov ...
508
  	kcov_task_init(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
509
  	return tsk;
61c4628b5   Suresh Siddha   x86, fpu: split F...
510

b235beea9   Linus Torvalds   Clarify naming of...
511
  free_stack:
ba14a194a   Andy Lutomirski   fork: Add generic...
512
  	free_thread_stack(tsk);
f19b9f74b   Akinobu Mita   fork: fix error h...
513
  free_tsk:
61c4628b5   Suresh Siddha   x86, fpu: split F...
514
515
  	free_task_struct(tsk);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
517
518
  }
  
  #ifdef CONFIG_MMU
0766f788e   Emese Revfy   latent_entropy: M...
519
520
  static __latent_entropy int dup_mmap(struct mm_struct *mm,
  					struct mm_struct *oldmm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
521
  {
297c5eee3   Linus Torvalds   mm: make the vma ...
522
  	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
523
524
525
  	struct rb_node **rb_link, *rb_parent;
  	int retval;
  	unsigned long charge;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
526

32cdba1e0   Oleg Nesterov   uprobes: Use perc...
527
  	uprobe_start_dup_mmap();
7c0512679   Michal Hocko   mm, fork: make du...
528
529
530
531
  	if (down_write_killable(&oldmm->mmap_sem)) {
  		retval = -EINTR;
  		goto fail_uprobe_end;
  	}
ec8c0446b   Ralf Baechle   [PATCH] Optimize ...
532
  	flush_cache_dup_mm(oldmm);
f8ac4ec9c   Oleg Nesterov   uprobes: Introduc...
533
  	uprobe_dup_mmap(oldmm, mm);
ad3394517   Ingo Molnar   [PATCH] lockdep: ...
534
535
536
537
  	/*
  	 * Not linked in yet - no deadlock potential:
  	 */
  	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
538

90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
539
540
  	/* No ordering required: file already has been exposed. */
  	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
4f7d46143   Vladimir Davydov   fork: copy mm's v...
541
  	mm->total_vm = oldmm->total_vm;
846383359   Konstantin Khlebnikov   mm: rework virtua...
542
  	mm->data_vm = oldmm->data_vm;
4f7d46143   Vladimir Davydov   fork: copy mm's v...
543
544
  	mm->exec_vm = oldmm->exec_vm;
  	mm->stack_vm = oldmm->stack_vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
545
546
547
  	rb_link = &mm->mm_rb.rb_node;
  	rb_parent = NULL;
  	pprev = &mm->mmap;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
548
549
550
  	retval = ksm_fork(mm, oldmm);
  	if (retval)
  		goto out;
ba76149f4   Andrea Arcangeli   thp: khugepaged
551
552
553
  	retval = khugepaged_fork(mm, oldmm);
  	if (retval)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554

297c5eee3   Linus Torvalds   mm: make the vma ...
555
  	prev = NULL;
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
556
  	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
558
559
  		struct file *file;
  
  		if (mpnt->vm_flags & VM_DONTCOPY) {
846383359   Konstantin Khlebnikov   mm: rework virtua...
560
  			vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
562
563
564
  			continue;
  		}
  		charge = 0;
  		if (mpnt->vm_flags & VM_ACCOUNT) {
b2412b7fa   Huang Shijie   fork: use vma_pag...
565
  			unsigned long len = vma_pages(mpnt);
191c54244   Al Viro   mm: collapse secu...
566
  			if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567
568
569
  				goto fail_nomem;
  			charge = len;
  		}
e94b17660   Christoph Lameter   [PATCH] slab: rem...
570
  		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
571
572
573
  		if (!tmp)
  			goto fail_nomem;
  		*tmp = *mpnt;
5beb49305   Rik van Riel   mm: change anon_v...
574
  		INIT_LIST_HEAD(&tmp->anon_vma_chain);
ef0855d33   Oleg Nesterov   mm: mempolicy: tu...
575
576
  		retval = vma_dup_policy(mpnt, tmp);
  		if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
577
  			goto fail_nomem_policy;
a247c3a97   Andrea Arcangeli   rmap: fix walk du...
578
  		tmp->vm_mm = mm;
5beb49305   Rik van Riel   mm: change anon_v...
579
580
  		if (anon_vma_fork(tmp, mpnt))
  			goto fail_nomem_anon_vma_fork;
de60f5f10   Eric B Munson   mm: introduce VM_...
581
582
  		tmp->vm_flags &=
  			~(VM_LOCKED|VM_LOCKONFAULT|VM_UFFD_MISSING|VM_UFFD_WP);
297c5eee3   Linus Torvalds   mm: make the vma ...
583
  		tmp->vm_next = tmp->vm_prev = NULL;
745f234be   Andrea Arcangeli   userfaultfd: add ...
584
  		tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585
586
  		file = tmp->vm_file;
  		if (file) {
496ad9aa8   Al Viro   new helper: file_...
587
  			struct inode *inode = file_inode(file);
b88ed2059   Hugh Dickins   fix mapping_writa...
588
  			struct address_space *mapping = file->f_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589
590
591
  			get_file(file);
  			if (tmp->vm_flags & VM_DENYWRITE)
  				atomic_dec(&inode->i_writecount);
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
592
  			i_mmap_lock_write(mapping);
b88ed2059   Hugh Dickins   fix mapping_writa...
593
  			if (tmp->vm_flags & VM_SHARED)
4bb5f5d93   David Herrmann   mm: allow drivers...
594
  				atomic_inc(&mapping->i_mmap_writable);
b88ed2059   Hugh Dickins   fix mapping_writa...
595
596
  			flush_dcache_mmap_lock(mapping);
  			/* insert tmp into the share list, just after mpnt */
27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
597
598
  			vma_interval_tree_insert_after(tmp, mpnt,
  					&mapping->i_mmap);
b88ed2059   Hugh Dickins   fix mapping_writa...
599
  			flush_dcache_mmap_unlock(mapping);
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
600
  			i_mmap_unlock_write(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
601
602
603
  		}
  
  		/*
a1e78772d   Mel Gorman   hugetlb: reserve ...
604
605
606
607
608
609
610
611
  		 * Clear hugetlb-related page reserves for children. This only
  		 * affects MAP_PRIVATE mappings. Faults generated by the child
  		 * are not guaranteed to succeed, even if read-only
  		 */
  		if (is_vm_hugetlb_page(tmp))
  			reset_vma_resv_huge_pages(tmp);
  
  		/*
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
612
  		 * Link in the new vma and copy the page table entries.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
613
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
614
615
  		*pprev = tmp;
  		pprev = &tmp->vm_next;
297c5eee3   Linus Torvalds   mm: make the vma ...
616
617
  		tmp->vm_prev = prev;
  		prev = tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618
619
620
621
622
623
  
  		__vma_link_rb(mm, tmp, rb_link, rb_parent);
  		rb_link = &tmp->vm_rb.rb_right;
  		rb_parent = &tmp->vm_rb;
  
  		mm->map_count++;
0b0db14c5   Hugh Dickins   [PATCH] unpaged: ...
624
  		retval = copy_page_range(mm, oldmm, mpnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
625
626
627
628
629
630
631
  
  		if (tmp->vm_ops && tmp->vm_ops->open)
  			tmp->vm_ops->open(tmp);
  
  		if (retval)
  			goto out;
  	}
d6dd61c83   Jeremy Fitzhardinge   [PATCH] x86: PARA...
632
633
  	/* a new mm has just been created */
  	arch_dup_mmap(oldmm, mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
634
  	retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
635
  out:
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
636
  	up_write(&mm->mmap_sem);
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
637
  	flush_tlb_mm(oldmm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
638
  	up_write(&oldmm->mmap_sem);
7c0512679   Michal Hocko   mm, fork: make du...
639
  fail_uprobe_end:
32cdba1e0   Oleg Nesterov   uprobes: Use perc...
640
  	uprobe_end_dup_mmap();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
641
  	return retval;
5beb49305   Rik van Riel   mm: change anon_v...
642
  fail_nomem_anon_vma_fork:
ef0855d33   Oleg Nesterov   mm: mempolicy: tu...
643
  	mpol_put(vma_policy(tmp));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
644
645
646
647
648
649
650
  fail_nomem_policy:
  	kmem_cache_free(vm_area_cachep, tmp);
  fail_nomem:
  	retval = -ENOMEM;
  	vm_unacct_memory(charge);
  	goto out;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
651
  static inline int mm_alloc_pgd(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
652
653
654
655
656
657
  {
  	mm->pgd = pgd_alloc(mm);
  	if (unlikely(!mm->pgd))
  		return -ENOMEM;
  	return 0;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
658
  static inline void mm_free_pgd(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
659
  {
5e5419734   Benjamin Herrenschmidt   add mm argument t...
660
  	pgd_free(mm, mm->pgd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
661
662
  }
  #else
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
663
664
665
666
667
668
669
  static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
  {
  	down_write(&oldmm->mmap_sem);
  	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
  	up_write(&oldmm->mmap_sem);
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
670
671
672
  #define mm_alloc_pgd(mm)	(0)
  #define mm_free_pgd(mm)
  #endif /* CONFIG_MMU */
23ff44402   Daniel Walker   whitespace fixes:...
673
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
674

e94b17660   Christoph Lameter   [PATCH] slab: rem...
675
  #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
  #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
4cb0e11b1   Hidehiro Kawai   coredump_filter: ...
677
678
679
680
681
682
683
684
685
686
687
  static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
  
  static int __init coredump_filter_setup(char *s)
  {
  	default_dump_filter =
  		(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
  		MMF_DUMP_FILTER_MASK;
  	return 1;
  }
  
  __setup("coredump_filter=", coredump_filter_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
688
  #include <linux/init_task.h>
858f09930   Alexey Dobriyan   aio: ifdef fields...
689
690
691
692
  static void mm_init_aio(struct mm_struct *mm)
  {
  #ifdef CONFIG_AIO
  	spin_lock_init(&mm->ioctx_lock);
db446a08c   Benjamin LaHaise   aio: convert the ...
693
  	mm->ioctx_table = NULL;
858f09930   Alexey Dobriyan   aio: ifdef fields...
694
695
  #endif
  }
33144e842   Vladimir Davydov   kernel/fork.c: ma...
696
697
698
699
700
701
  static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
  {
  #ifdef CONFIG_MEMCG
  	mm->owner = p;
  #endif
  }
694a95fa6   Eric W. Biederman   mm: Add a user_ns...
702
703
  static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
  	struct user_namespace *user_ns)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
704
  {
41f727fde   Vladimir Davydov   fork/exec: cleanu...
705
706
707
  	mm->mmap = NULL;
  	mm->mm_rb = RB_ROOT;
  	mm->vmacache_seqnum = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
709
710
711
  	atomic_set(&mm->mm_users, 1);
  	atomic_set(&mm->mm_count, 1);
  	init_rwsem(&mm->mmap_sem);
  	INIT_LIST_HEAD(&mm->mmlist);
999d9fc16   Oleg Nesterov   coredump: move mm...
712
  	mm->core_state = NULL;
e1f56c89b   Kirill A. Shutemov   mm: convert mm->n...
713
  	atomic_long_set(&mm->nr_ptes, 0);
2d2f5119b   Kirill A. Shutemov   mm: do not use mm...
714
  	mm_nr_pmds_init(mm);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
715
716
  	mm->map_count = 0;
  	mm->locked_vm = 0;
ce65cefa5   Vladimir Davydov   fork: reset mm->p...
717
  	mm->pinned_vm = 0;
d559db086   KAMEZAWA Hiroyuki   mm: clean up mm_c...
718
  	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
719
  	spin_lock_init(&mm->page_table_lock);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
720
  	mm_init_cpumask(mm);
858f09930   Alexey Dobriyan   aio: ifdef fields...
721
  	mm_init_aio(mm);
cf475ad28   Balbir Singh   cgroups: add an o...
722
  	mm_init_owner(mm, p);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
723
  	mmu_notifier_mm_init(mm);
208414059   Rik van Riel   mm: fix TLB flush...
724
  	clear_tlb_flush_pending(mm);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
725
726
727
  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
  	mm->pmd_huge_pte = NULL;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
728

a0715cc22   Alex Thorlton   mm, thp: add VM_I...
729
730
731
732
733
  	if (current->mm) {
  		mm->flags = current->mm->flags & MMF_INIT_MASK;
  		mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
  	} else {
  		mm->flags = default_dump_filter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
734
  		mm->def_flags = 0;
a0715cc22   Alex Thorlton   mm, thp: add VM_I...
735
  	}
41f727fde   Vladimir Davydov   fork/exec: cleanu...
736
737
738
739
740
  	if (mm_alloc_pgd(mm))
  		goto fail_nopgd;
  
  	if (init_new_context(p, mm))
  		goto fail_nocontext;
78fb74669   Pavel Emelianov   Memory controller...
741

694a95fa6   Eric W. Biederman   mm: Add a user_ns...
742
  	mm->user_ns = get_user_ns(user_ns);
41f727fde   Vladimir Davydov   fork/exec: cleanu...
743
744
745
746
747
  	return mm;
  
  fail_nocontext:
  	mm_free_pgd(mm);
  fail_nopgd:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748
749
750
  	free_mm(mm);
  	return NULL;
  }
c3f0327f8   Konstantin Khlebnikov   mm: add rss count...
751
752
753
754
755
756
757
758
759
760
761
762
  static void check_mm(struct mm_struct *mm)
  {
  	int i;
  
  	for (i = 0; i < NR_MM_COUNTERS; i++) {
  		long x = atomic_long_read(&mm->rss_stat.count[i]);
  
  		if (unlikely(x))
  			printk(KERN_ALERT "BUG: Bad rss-counter state "
  					  "mm:%p idx:%d val:%ld
  ", mm, i, x);
  	}
b30fe6c7c   Kirill A. Shutemov   mm: fix false-pos...
763
764
765
766
767
768
769
770
771
  
  	if (atomic_long_read(&mm->nr_ptes))
  		pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld
  ",
  				atomic_long_read(&mm->nr_ptes));
  	if (mm_nr_pmds(mm))
  		pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld
  ",
  				mm_nr_pmds(mm));
e009bb30c   Kirill A. Shutemov   mm: implement spl...
772
  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
96dad67ff   Sasha Levin   mm: use VM_BUG_ON...
773
  	VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
c3f0327f8   Konstantin Khlebnikov   mm: add rss count...
774
775
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
776
777
778
  /*
   * Allocate and initialize an mm_struct.
   */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
779
  struct mm_struct *mm_alloc(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
  {
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
781
  	struct mm_struct *mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
782
783
  
  	mm = allocate_mm();
de03c72cf   KOSAKI Motohiro   mm: convert mm->c...
784
785
786
787
  	if (!mm)
  		return NULL;
  
  	memset(mm, 0, sizeof(*mm));
694a95fa6   Eric W. Biederman   mm: Add a user_ns...
788
  	return mm_init(mm, current, current_user_ns());
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
790
791
792
793
794
795
  }
  
  /*
   * Called when the last reference to the mm
   * is dropped: either by a lazy thread or by
   * mmput. Free the page directory and the mm.
   */
7ad5b3a50   Harvey Harrison   kernel: remove fa...
796
  void __mmdrop(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
797
798
799
800
  {
  	BUG_ON(mm == &init_mm);
  	mm_free_pgd(mm);
  	destroy_context(mm);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
801
  	mmu_notifier_mm_destroy(mm);
c3f0327f8   Konstantin Khlebnikov   mm: add rss count...
802
  	check_mm(mm);
694a95fa6   Eric W. Biederman   mm: Add a user_ns...
803
  	put_user_ns(mm->user_ns);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
804
805
  	free_mm(mm);
  }
6d4e4c4fc   Avi Kivity   KVM: Disallow for...
806
  EXPORT_SYMBOL_GPL(__mmdrop);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
807

ec8d7c14e   Michal Hocko   mm, oom_reaper: d...
808
809
810
811
812
813
814
815
816
  static inline void __mmput(struct mm_struct *mm)
  {
  	VM_BUG_ON(atomic_read(&mm->mm_users));
  
  	uprobe_clear_state(mm);
  	exit_aio(mm);
  	ksm_exit(mm);
  	khugepaged_exit(mm); /* must run before exit_mmap */
  	exit_mmap(mm);
6fcb52a56   Aaron Lu   thp: reduce usage...
817
  	mm_put_huge_zero_page(mm);
ec8d7c14e   Michal Hocko   mm, oom_reaper: d...
818
819
820
821
822
823
824
825
  	set_mm_exe_file(mm, NULL);
  	if (!list_empty(&mm->mmlist)) {
  		spin_lock(&mmlist_lock);
  		list_del(&mm->mmlist);
  		spin_unlock(&mmlist_lock);
  	}
  	if (mm->binfmt)
  		module_put(mm->binfmt->module);
862e3073b   Michal Hocko   mm, oom: get rid ...
826
  	set_bit(MMF_OOM_SKIP, &mm->flags);
ec8d7c14e   Michal Hocko   mm, oom_reaper: d...
827
828
  	mmdrop(mm);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
830
831
832
833
  /*
   * Decrement the use count and release all resources for an mm.
   */
  void mmput(struct mm_struct *mm)
  {
0ae26f1b3   Andrew Morton   [PATCH] mmput() m...
834
  	might_sleep();
ec8d7c14e   Michal Hocko   mm, oom_reaper: d...
835
836
837
838
  	if (atomic_dec_and_test(&mm->mm_users))
  		__mmput(mm);
  }
  EXPORT_SYMBOL_GPL(mmput);
7ef949d77   Michal Hocko   mm: oom_reaper: r...
839
  #ifdef CONFIG_MMU
ec8d7c14e   Michal Hocko   mm, oom_reaper: d...
840
841
842
843
844
845
846
847
  static void mmput_async_fn(struct work_struct *work)
  {
  	struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
  	__mmput(mm);
  }
  
  void mmput_async(struct mm_struct *mm)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848
  	if (atomic_dec_and_test(&mm->mm_users)) {
ec8d7c14e   Michal Hocko   mm, oom_reaper: d...
849
850
  		INIT_WORK(&mm->async_put_work, mmput_async_fn);
  		schedule_work(&mm->async_put_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851
852
  	}
  }
7ef949d77   Michal Hocko   mm: oom_reaper: r...
853
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854

90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
855
856
857
858
859
  /**
   * set_mm_exe_file - change a reference to the mm's executable file
   *
   * This changes mm's executable file (shown as symlink /proc/[pid]/exe).
   *
6e399cd14   Davidlohr Bueso   prctl: avoid usin...
860
861
862
863
864
   * Main users are mmput() and sys_execve(). Callers prevent concurrent
   * invocations: in mmput() nobody alive left, in execve task is single
   * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
   * mm->exe_file, but does so without using set_mm_exe_file() in order
   * to do avoid the need for any locks.
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
865
   */
386460138   Jiri Slaby   mm: extract exe_f...
866
867
  void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
  {
6e399cd14   Davidlohr Bueso   prctl: avoid usin...
868
869
870
871
872
873
874
875
  	struct file *old_exe_file;
  
  	/*
  	 * It is safe to dereference the exe_file without RCU as
  	 * this function is only called if nobody else can access
  	 * this mm -- see comment above for justification.
  	 */
  	old_exe_file = rcu_dereference_raw(mm->exe_file);
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
876

386460138   Jiri Slaby   mm: extract exe_f...
877
878
  	if (new_exe_file)
  		get_file(new_exe_file);
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
879
880
881
  	rcu_assign_pointer(mm->exe_file, new_exe_file);
  	if (old_exe_file)
  		fput(old_exe_file);
386460138   Jiri Slaby   mm: extract exe_f...
882
  }
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
883
884
885
886
887
888
  /**
   * get_mm_exe_file - acquire a reference to the mm's executable file
   *
   * Returns %NULL if mm has no associated executable file.
   * User must release file via fput().
   */
386460138   Jiri Slaby   mm: extract exe_f...
889
890
891
  struct file *get_mm_exe_file(struct mm_struct *mm)
  {
  	struct file *exe_file;
90f31d0ea   Konstantin Khlebnikov   mm: rcu-protected...
892
893
894
895
896
  	rcu_read_lock();
  	exe_file = rcu_dereference(mm->exe_file);
  	if (exe_file && !get_file_rcu(exe_file))
  		exe_file = NULL;
  	rcu_read_unlock();
386460138   Jiri Slaby   mm: extract exe_f...
897
898
  	return exe_file;
  }
11163348a   Davidlohr Bueso   oprofile: reduce ...
899
  EXPORT_SYMBOL(get_mm_exe_file);
386460138   Jiri Slaby   mm: extract exe_f...
900

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
901
  /**
cd81a9170   Mateusz Guzik   mm: introduce get...
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
   * get_task_exe_file - acquire a reference to the task's executable file
   *
   * Returns %NULL if task's mm (if any) has no associated executable file or
   * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
   * User must release file via fput().
   */
  struct file *get_task_exe_file(struct task_struct *task)
  {
  	struct file *exe_file = NULL;
  	struct mm_struct *mm;
  
  	task_lock(task);
  	mm = task->mm;
  	if (mm) {
  		if (!(task->flags & PF_KTHREAD))
  			exe_file = get_mm_exe_file(mm);
  	}
  	task_unlock(task);
  	return exe_file;
  }
  EXPORT_SYMBOL(get_task_exe_file);
386460138   Jiri Slaby   mm: extract exe_f...
923

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
924
925
926
  /**
   * get_task_mm - acquire a reference to the task's mm
   *
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
927
   * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
928
929
930
931
932
933
934
935
936
937
938
939
   * this kernel workthread has transiently adopted a user mm with use_mm,
   * to do its AIO) is not set and if so returns a reference to it, after
   * bumping up the use count.  User must release the mm via mmput()
   * after use.  Typically used by /proc and ptrace.
   */
  struct mm_struct *get_task_mm(struct task_struct *task)
  {
  	struct mm_struct *mm;
  
  	task_lock(task);
  	mm = task->mm;
  	if (mm) {
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
940
  		if (task->flags & PF_KTHREAD)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
941
942
943
944
945
946
947
948
  			mm = NULL;
  		else
  			atomic_inc(&mm->mm_users);
  	}
  	task_unlock(task);
  	return mm;
  }
  EXPORT_SYMBOL_GPL(get_task_mm);
8cdb878dc   Christopher Yeoh   Fix race in proce...
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
  struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
  {
  	struct mm_struct *mm;
  	int err;
  
  	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
  	if (err)
  		return ERR_PTR(err);
  
  	mm = get_task_mm(task);
  	if (mm && mm != current->mm &&
  			!ptrace_may_access(task, mode)) {
  		mmput(mm);
  		mm = ERR_PTR(-EACCES);
  	}
  	mutex_unlock(&task->signal->cred_guard_mutex);
  
  	return mm;
  }
57b59c4a1   Oleg Nesterov   coredump_wait: do...
968
  static void complete_vfork_done(struct task_struct *tsk)
c415c3b47   Oleg Nesterov   vfork: introduce ...
969
  {
d68b46fe1   Oleg Nesterov   vfork: make it ki...
970
  	struct completion *vfork;
c415c3b47   Oleg Nesterov   vfork: introduce ...
971

d68b46fe1   Oleg Nesterov   vfork: make it ki...
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
  	task_lock(tsk);
  	vfork = tsk->vfork_done;
  	if (likely(vfork)) {
  		tsk->vfork_done = NULL;
  		complete(vfork);
  	}
  	task_unlock(tsk);
  }
  
  static int wait_for_vfork_done(struct task_struct *child,
  				struct completion *vfork)
  {
  	int killed;
  
  	freezer_do_not_count();
  	killed = wait_for_completion_killable(vfork);
  	freezer_count();
  
  	if (killed) {
  		task_lock(child);
  		child->vfork_done = NULL;
  		task_unlock(child);
  	}
  
  	put_task_struct(child);
  	return killed;
c415c3b47   Oleg Nesterov   vfork: introduce ...
998
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
  /* Please note the differences between mmput and mm_release.
   * mmput is called whenever we stop holding onto a mm_struct,
   * error success whatever.
   *
   * mm_release is called after a mm_struct has been removed
   * from the current process.
   *
   * This difference is important for error handling, when we
   * only half set up a mm_struct for a new process and need to restore
   * the old one.  Because we mmput the new mm_struct before
   * restoring the old one. . .
   * Eric Biederman 10 January 1998
   */
  void mm_release(struct task_struct *tsk, struct mm_struct *mm)
  {
8141c7f3e   Linus Torvalds   Move "exit_robust...
1014
1015
  	/* Get rid of any futexes when releasing the mm */
  #ifdef CONFIG_FUTEX
fc6b177de   Peter Zijlstra   futex: Nullify ro...
1016
  	if (unlikely(tsk->robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
1017
  		exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
1018
1019
  		tsk->robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
1020
  #ifdef CONFIG_COMPAT
fc6b177de   Peter Zijlstra   futex: Nullify ro...
1021
  	if (unlikely(tsk->compat_robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
1022
  		compat_exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
1023
1024
  		tsk->compat_robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
1025
  #endif
322a2c100   Thomas Gleixner   futex: Move exit_...
1026
1027
  	if (unlikely(!list_empty(&tsk->pi_state_list)))
  		exit_pi_state_list(tsk);
8141c7f3e   Linus Torvalds   Move "exit_robust...
1028
  #endif
0326f5a94   Srikar Dronamraju   uprobes/core: Han...
1029
  	uprobe_free_utask(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1030
1031
  	/* Get rid of any cached register state */
  	deactivate_mm(tsk, mm);
fec1d0115   Roland McGrath   [PATCH] Disable C...
1032
  	/*
735f2770a   Michal Hocko   kernel/fork: fix ...
1033
1034
1035
  	 * Signal userspace if we're not exiting with a core dump
  	 * because we want to leave the value intact for debugging
  	 * purposes.
fec1d0115   Roland McGrath   [PATCH] Disable C...
1036
  	 */
9c8a8228d   Eric Dumazet   execve: must clea...
1037
  	if (tsk->clear_child_tid) {
735f2770a   Michal Hocko   kernel/fork: fix ...
1038
  		if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
9c8a8228d   Eric Dumazet   execve: must clea...
1039
1040
1041
1042
1043
1044
1045
1046
1047
  		    atomic_read(&mm->mm_users) > 1) {
  			/*
  			 * We don't check the error code - if userspace has
  			 * not set up a proper pointer then tough luck.
  			 */
  			put_user(0, tsk->clear_child_tid);
  			sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
  					1, NULL, NULL, 0);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1048
  		tsk->clear_child_tid = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1049
  	}
f7505d64f   Konstantin Khlebnikov   fork: call comple...
1050
1051
1052
1053
1054
1055
1056
  
  	/*
  	 * All done, finally we can wake up parent and return this mm to him.
  	 * Also kthread_stop() uses this completion for synchronization.
  	 */
  	if (tsk->vfork_done)
  		complete_vfork_done(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1057
  }
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1058
1059
1060
1061
  /*
   * Allocate a new mm structure and copy contents from the
   * mm structure of the passed in task structure.
   */
ff252c1fc   DaeSeok Youn   kernel/fork.c: ma...
1062
  static struct mm_struct *dup_mm(struct task_struct *tsk)
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1063
1064
1065
  {
  	struct mm_struct *mm, *oldmm = current->mm;
  	int err;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1066
1067
1068
1069
1070
  	mm = allocate_mm();
  	if (!mm)
  		goto fail_nomem;
  
  	memcpy(mm, oldmm, sizeof(*mm));
694a95fa6   Eric W. Biederman   mm: Add a user_ns...
1071
  	if (!mm_init(mm, tsk, mm->user_ns))
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1072
  		goto fail_nomem;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1073
1074
1075
1076
1077
1078
  	err = dup_mmap(mm, oldmm);
  	if (err)
  		goto free_pt;
  
  	mm->hiwater_rss = get_mm_rss(mm);
  	mm->hiwater_vm = mm->total_vm;
801460d0c   Hiroshi Shimamoto   task_struct clean...
1079
1080
  	if (mm->binfmt && !try_module_get(mm->binfmt->module))
  		goto free_pt;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1081
1082
1083
  	return mm;
  
  free_pt:
801460d0c   Hiroshi Shimamoto   task_struct clean...
1084
1085
  	/* don't put binfmt in mmput, we haven't got module yet */
  	mm->binfmt = NULL;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1086
1087
1088
1089
  	mmput(mm);
  
  fail_nomem:
  	return NULL;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1090
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1091
  static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1092
  {
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1093
  	struct mm_struct *mm, *oldmm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1094
1095
1096
1097
  	int retval;
  
  	tsk->min_flt = tsk->maj_flt = 0;
  	tsk->nvcsw = tsk->nivcsw = 0;
17406b82d   Mandeep Singh Baines   softlockup: remov...
1098
1099
1100
  #ifdef CONFIG_DETECT_HUNG_TASK
  	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
  
  	tsk->mm = NULL;
  	tsk->active_mm = NULL;
  
  	/*
  	 * Are we cloning a kernel thread?
  	 *
  	 * We need to steal a active VM for that..
  	 */
  	oldmm = current->mm;
  	if (!oldmm)
  		return 0;
615d6e875   Davidlohr Bueso   mm: per-thread vm...
1113
1114
  	/* initialize the new vmacache entries */
  	vmacache_flush(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1115
1116
1117
  	if (clone_flags & CLONE_VM) {
  		atomic_inc(&oldmm->mm_users);
  		mm = oldmm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1118
1119
1120
1121
  		goto good_mm;
  	}
  
  	retval = -ENOMEM;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1122
  	mm = dup_mm(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1123
1124
  	if (!mm)
  		goto fail_nomem;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
1126
1127
1128
  good_mm:
  	tsk->mm = mm;
  	tsk->active_mm = mm;
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1129
1130
  fail_nomem:
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1131
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1132
  static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1133
  {
498052bba   Al Viro   New locking/refco...
1134
  	struct fs_struct *fs = current->fs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1135
  	if (clone_flags & CLONE_FS) {
498052bba   Al Viro   New locking/refco...
1136
  		/* tsk->fs is already what we want */
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1137
  		spin_lock(&fs->lock);
498052bba   Al Viro   New locking/refco...
1138
  		if (fs->in_exec) {
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1139
  			spin_unlock(&fs->lock);
498052bba   Al Viro   New locking/refco...
1140
1141
1142
  			return -EAGAIN;
  		}
  		fs->users++;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1143
  		spin_unlock(&fs->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1144
1145
  		return 0;
  	}
498052bba   Al Viro   New locking/refco...
1146
  	tsk->fs = copy_fs_struct(fs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1147
1148
1149
1150
  	if (!tsk->fs)
  		return -ENOMEM;
  	return 0;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1151
  static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
a016f3389   JANAK DESAI   [PATCH] unshare s...
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
  {
  	struct files_struct *oldf, *newf;
  	int error = 0;
  
  	/*
  	 * A background process may not have any files ...
  	 */
  	oldf = current->files;
  	if (!oldf)
  		goto out;
  
  	if (clone_flags & CLONE_FILES) {
  		atomic_inc(&oldf->count);
  		goto out;
  	}
a016f3389   JANAK DESAI   [PATCH] unshare s...
1167
1168
1169
1170
1171
1172
1173
1174
1175
  	newf = dup_fd(oldf, &error);
  	if (!newf)
  		goto out;
  
  	tsk->files = newf;
  	error = 0;
  out:
  	return error;
  }
fadad878c   Jens Axboe   kernel: add CLONE...
1176
  static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
fd0928df9   Jens Axboe   ioprio: move io p...
1177
1178
1179
  {
  #ifdef CONFIG_BLOCK
  	struct io_context *ioc = current->io_context;
6e736be7f   Tejun Heo   block: make ioc g...
1180
  	struct io_context *new_ioc;
fd0928df9   Jens Axboe   ioprio: move io p...
1181
1182
1183
  
  	if (!ioc)
  		return 0;
fadad878c   Jens Axboe   kernel: add CLONE...
1184
1185
1186
1187
  	/*
  	 * Share io context with parent, if CLONE_IO is set
  	 */
  	if (clone_flags & CLONE_IO) {
3d48749d9   Tejun Heo   block: ioc_task_l...
1188
1189
  		ioc_task_link(ioc);
  		tsk->io_context = ioc;
fadad878c   Jens Axboe   kernel: add CLONE...
1190
  	} else if (ioprio_valid(ioc->ioprio)) {
6e736be7f   Tejun Heo   block: make ioc g...
1191
1192
  		new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
  		if (unlikely(!new_ioc))
fd0928df9   Jens Axboe   ioprio: move io p...
1193
  			return -ENOMEM;
6e736be7f   Tejun Heo   block: make ioc g...
1194
  		new_ioc->ioprio = ioc->ioprio;
11a3122f6   Tejun Heo   block: strip out ...
1195
  		put_io_context(new_ioc);
fd0928df9   Jens Axboe   ioprio: move io p...
1196
1197
1198
1199
  	}
  #endif
  	return 0;
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1200
  static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1201
1202
  {
  	struct sighand_struct *sig;
60348802e   Zhaolei   fork.c: cleanup f...
1203
  	if (clone_flags & CLONE_SIGHAND) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1204
1205
1206
1207
  		atomic_inc(&current->sighand->count);
  		return 0;
  	}
  	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
e56d09031   Ingo Molnar   [PATCH] RCU signa...
1208
  	rcu_assign_pointer(tsk->sighand, sig);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1209
1210
  	if (!sig)
  		return -ENOMEM;
9d7fb0427   Peter Zijlstra   sched/cputime: Gu...
1211

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1212
1213
1214
1215
  	atomic_set(&sig->count, 1);
  	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
  	return 0;
  }
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
1216
  void __cleanup_sighand(struct sighand_struct *sighand)
c81addc9d   Oleg Nesterov   [PATCH] rename __...
1217
  {
d80e731ec   Oleg Nesterov   epoll: introduce ...
1218
1219
  	if (atomic_dec_and_test(&sighand->count)) {
  		signalfd_cleanup(sighand);
392809b25   Oleg Nesterov   signal: Document ...
1220
1221
1222
1223
  		/*
  		 * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it
  		 * without an RCU grace period, see __lock_task_sighand().
  		 */
c81addc9d   Oleg Nesterov   [PATCH] rename __...
1224
  		kmem_cache_free(sighand_cachep, sighand);
d80e731ec   Oleg Nesterov   epoll: introduce ...
1225
  	}
c81addc9d   Oleg Nesterov   [PATCH] rename __...
1226
  }
f06febc96   Frank Mayhar   timers: fix itime...
1227
1228
1229
1230
1231
  /*
   * Initialize POSIX timer handling for a thread group.
   */
  static void posix_cpu_timers_init_group(struct signal_struct *sig)
  {
78d7d407b   Jiri Slaby   kernel core: use ...
1232
  	unsigned long cpu_limit;
316c1608d   Jason Low   sched, timer: Con...
1233
  	cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
78d7d407b   Jiri Slaby   kernel core: use ...
1234
1235
  	if (cpu_limit != RLIM_INFINITY) {
  		sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
d5c373eb5   Jason Low   posix_cpu_timer: ...
1236
  		sig->cputimer.running = true;
6279a751f   Oleg Nesterov   posix-timers: fix...
1237
  	}
f06febc96   Frank Mayhar   timers: fix itime...
1238
1239
1240
1241
1242
  	/* The timer lists. */
  	INIT_LIST_HEAD(&sig->cpu_timers[0]);
  	INIT_LIST_HEAD(&sig->cpu_timers[1]);
  	INIT_LIST_HEAD(&sig->cpu_timers[2]);
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1243
  static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1244
1245
  {
  	struct signal_struct *sig;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1246

4ab6c0833   Oleg Nesterov   clone(): fix race...
1247
  	if (clone_flags & CLONE_THREAD)
490dea45d   Peter Zijlstra   itimers: remove t...
1248
  		return 0;
490dea45d   Peter Zijlstra   itimers: remove t...
1249

a56704ef6   Veaceslav Falico   copy_signal() cle...
1250
  	sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1251
1252
1253
  	tsk->signal = sig;
  	if (!sig)
  		return -ENOMEM;
b3ac022cb   Oleg Nesterov   proc: turn signal...
1254
  	sig->nr_threads = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
  	atomic_set(&sig->live, 1);
b3ac022cb   Oleg Nesterov   proc: turn signal...
1256
  	atomic_set(&sig->sigcnt, 1);
0c740d0af   Oleg Nesterov   introduce for_eac...
1257
1258
1259
1260
  
  	/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
  	sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
  	tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261
  	init_waitqueue_head(&sig->wait_chldexit);
db51aeccd   Oleg Nesterov   signals: microopt...
1262
  	sig->curr_target = tsk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1263
1264
  	init_sigpending(&sig->shared_pending);
  	INIT_LIST_HEAD(&sig->posix_timers);
e78c34967   Rik van Riel   time, signal: Pro...
1265
  	seqlock_init(&sig->stats_lock);
9d7fb0427   Peter Zijlstra   sched/cputime: Gu...
1266
  	prev_cputime_init(&sig->prev_cputime);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1267

c9cb2e3d7   Thomas Gleixner   [PATCH] hrtimers:...
1268
  	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1269
  	sig->real_timer.function = it_real_fn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1270

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1271
1272
1273
  	task_lock(current->group_leader);
  	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
  	task_unlock(current->group_leader);
6279a751f   Oleg Nesterov   posix-timers: fix...
1274
  	posix_cpu_timers_init_group(sig);
522ed7767   Miloslav Trmac   Audit: add TTY in...
1275
  	tty_audit_fork(sig);
5091faa44   Mike Galbraith   sched: Add 'autog...
1276
  	sched_autogroup_fork(sig);
522ed7767   Miloslav Trmac   Audit: add TTY in...
1277

a63d83f42   David Rientjes   oom: badness heur...
1278
  	sig->oom_score_adj = current->signal->oom_score_adj;
dabb16f63   Mandeep Singh Baines   oom: allow a non-...
1279
  	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
28b83c519   KOSAKI Motohiro   oom: move oom_adj...
1280

ebec18a6d   Lennart Poettering   prctl: add PR_{SE...
1281
1282
  	sig->has_child_subreaper = current->signal->has_child_subreaper ||
  				   current->signal->is_child_subreaper;
9b1bf12d5   KOSAKI Motohiro   signals: move cre...
1283
  	mutex_init(&sig->cred_guard_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284
1285
  	return 0;
  }
dbd952127   Kees Cook   seccomp: introduc...
1286
1287
1288
1289
1290
1291
1292
1293
1294
  static void copy_seccomp(struct task_struct *p)
  {
  #ifdef CONFIG_SECCOMP
  	/*
  	 * Must be called with sighand->lock held, which is common to
  	 * all threads in the group. Holding cred_guard_mutex is not
  	 * needed because this new task is not yet running and cannot
  	 * be racing exec.
  	 */
69f6a34bd   Guenter Roeck   seccomp: Replace ...
1295
  	assert_spin_locked(&current->sighand->siglock);
dbd952127   Kees Cook   seccomp: introduc...
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
  
  	/* Ref-count the new filter user, and assign it. */
  	get_seccomp_filter(current);
  	p->seccomp = current->seccomp;
  
  	/*
  	 * Explicitly enable no_new_privs here in case it got set
  	 * between the task_struct being duplicated and holding the
  	 * sighand lock. The seccomp state and nnp must be in sync.
  	 */
  	if (task_no_new_privs(current))
  		task_set_no_new_privs(p);
  
  	/*
  	 * If the parent gained a seccomp mode after copying thread
  	 * flags and between before we held the sighand lock, we have
  	 * to manually enable the seccomp thread flag here.
  	 */
  	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
  		set_tsk_thread_flag(p, TIF_SECCOMP);
  #endif
  }
17da2bd90   Heiko Carstens   [CVE-2009-0029] S...
1318
  SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1319
1320
  {
  	current->clear_child_tid = tidptr;
b488893a3   Pavel Emelyanov   pid namespaces: c...
1321
  	return task_pid_vnr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1322
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1323
  static void rt_mutex_init_task(struct task_struct *p)
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
1324
  {
1d6154825   Thomas Gleixner   sched: Convert pi...
1325
  	raw_spin_lock_init(&p->pi_lock);
e29e175b0   Zilvinas Valinskas   [PATCH] initialis...
1326
  #ifdef CONFIG_RT_MUTEXES
fb00aca47   Peter Zijlstra   rtmutex: Turn the...
1327
1328
  	p->pi_waiters = RB_ROOT;
  	p->pi_waiters_leftmost = NULL;
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
1329
  	p->pi_blocked_on = NULL;
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
1330
1331
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1332
  /*
f06febc96   Frank Mayhar   timers: fix itime...
1333
1334
1335
1336
   * Initialize POSIX timer handling for a single task.
   */
  static void posix_cpu_timers_init(struct task_struct *tsk)
  {
648616343   Martin Schwidefsky   [S390] cputime: a...
1337
1338
  	tsk->cputime_expires.prof_exp = 0;
  	tsk->cputime_expires.virt_exp = 0;
f06febc96   Frank Mayhar   timers: fix itime...
1339
1340
1341
1342
1343
  	tsk->cputime_expires.sched_exp = 0;
  	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
  }
819077398   Oleg Nesterov   kernel/fork.c:cop...
1344
1345
1346
1347
1348
  static inline void
  init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
  {
  	 task->pids[type].pid = pid;
  }
f06febc96   Frank Mayhar   timers: fix itime...
1349
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1350
1351
1352
1353
1354
1355
1356
   * This creates a new process as a copy of the old one,
   * but does not actually start it yet.
   *
   * It copies the registers, and all the appropriate
   * parts of the process environment (as per the clone
   * flags). The actual kick-off is left to the caller.
   */
0766f788e   Emese Revfy   latent_entropy: M...
1357
1358
  static __latent_entropy struct task_struct *copy_process(
  					unsigned long clone_flags,
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1359
  					unsigned long stack_start,
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1360
  					unsigned long stack_size,
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1361
  					int __user *child_tidptr,
09a05394f   Roland McGrath   tracehook: clone
1362
  					struct pid *pid,
3033f14ab   Josh Triplett   clone: support pa...
1363
  					int trace,
725fc629f   Andi Kleen   kernek/fork.c: al...
1364
1365
  					unsigned long tls,
  					int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1366
1367
  {
  	int retval;
a24efe62d   Mariusz Kozlowski   kernel/fork.c: re...
1368
  	struct task_struct *p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1369
1370
1371
  
  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
  		return ERR_PTR(-EINVAL);
e66eded83   Eric W. Biederman   userns: Don't all...
1372
1373
  	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
  		return ERR_PTR(-EINVAL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
  	/*
  	 * Thread groups must share signals as well, and detached threads
  	 * can only be started up within the thread group.
  	 */
  	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
  		return ERR_PTR(-EINVAL);
  
  	/*
  	 * Shared signal handlers imply shared VM. By way of the above,
  	 * thread groups also imply shared VM. Blocking this case allows
  	 * for various simplifications in other code.
  	 */
  	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
  		return ERR_PTR(-EINVAL);
123be07b0   Sukadev Bhattiprolu   fork(): disable C...
1388
1389
1390
1391
1392
1393
1394
1395
1396
  	/*
  	 * Siblings of global init remain as zombies on exit since they are
  	 * not reaped by their parent (swapper). To solve this and to avoid
  	 * multi-rooted process trees, prevent global and container-inits
  	 * from creating siblings.
  	 */
  	if ((clone_flags & CLONE_PARENT) &&
  				current->signal->flags & SIGNAL_UNKILLABLE)
  		return ERR_PTR(-EINVAL);
8382fcac1   Eric W. Biederman   pidns: Outlaw thr...
1397
  	/*
40a0d32d1   Oleg Nesterov   fork: unify and t...
1398
  	 * If the new process will be in a different pid or user namespace
faf00da54   Eric W. Biederman   userns,pidns: For...
1399
  	 * do not allow it to share a thread group with the forking task.
8382fcac1   Eric W. Biederman   pidns: Outlaw thr...
1400
  	 */
faf00da54   Eric W. Biederman   userns,pidns: For...
1401
  	if (clone_flags & CLONE_THREAD) {
40a0d32d1   Oleg Nesterov   fork: unify and t...
1402
1403
1404
1405
1406
  		if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
  		    (task_active_pid_ns(current) !=
  				current->nsproxy->pid_ns_for_children))
  			return ERR_PTR(-EINVAL);
  	}
8382fcac1   Eric W. Biederman   pidns: Outlaw thr...
1407

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1408
1409
1410
1411
1412
  	retval = security_task_create(clone_flags);
  	if (retval)
  		goto fork_out;
  
  	retval = -ENOMEM;
725fc629f   Andi Kleen   kernek/fork.c: al...
1413
  	p = dup_task_struct(current, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1414
1415
  	if (!p)
  		goto fork_out;
f7e8b616e   Steven Rostedt   function-graph: m...
1416
  	ftrace_graph_init_task(p);
bea493a03   Peter Zijlstra   [PATCH] rt-mutex:...
1417
  	rt_mutex_init_task(p);
d12c1a379   Ingo Molnar   lockdep: fix kern...
1418
  #ifdef CONFIG_PROVE_LOCKING
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1419
1420
1421
  	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
  	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1422
  	retval = -EAGAIN;
3b11a1dec   David Howells   CRED: Differentia...
1423
  	if (atomic_read(&p->real_cred->user->processes) >=
78d7d407b   Jiri Slaby   kernel core: use ...
1424
  			task_rlimit(p, RLIMIT_NPROC)) {
b57922b6c   Eric Paris   fork: reorder per...
1425
1426
  		if (p->real_cred->user != INIT_USER &&
  		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1427
1428
  			goto bad_fork_free;
  	}
72fa59970   Vasiliy Kulikov   move RLIMIT_NPROC...
1429
  	current->flags &= ~PF_NPROC_EXCEEDED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1430

f1752eec6   David Howells   CRED: Detach the ...
1431
1432
1433
  	retval = copy_creds(p, clone_flags);
  	if (retval < 0)
  		goto bad_fork_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1434
1435
1436
1437
1438
1439
  
  	/*
  	 * If multiple threads are within copy_process(), then this check
  	 * triggers too late. This doesn't hurt, the check is only there
  	 * to stop root fork bombs.
  	 */
04ec93fe9   Li Zefan   fork.c: fix NULL ...
1440
  	retval = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1441
1442
  	if (nr_threads >= max_threads)
  		goto bad_fork_cleanup_count;
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
1443
  	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
514ddb446   David Rientjes   fork: collapse co...
1444
1445
  	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
  	p->flags |= PF_FORKNOEXEC;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1446
1447
  	INIT_LIST_HEAD(&p->children);
  	INIT_LIST_HEAD(&p->sibling);
f41d911f8   Paul E. McKenney   rcu: Merge preemp...
1448
  	rcu_copy_process(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1449
1450
  	p->vfork_done = NULL;
  	spin_lock_init(&p->alloc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1451

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1452
  	init_sigpending(&p->pending);
648616343   Martin Schwidefsky   [S390] cputime: a...
1453
1454
  	p->utime = p->stime = p->gtime = 0;
  	p->utimescaled = p->stimescaled = 0;
9d7fb0427   Peter Zijlstra   sched/cputime: Gu...
1455
  	prev_cputime_init(&p->prev_cputime);
6a61671bb   Frederic Weisbecker   cputime: Safely r...
1456
  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
b7ce2277f   Frederic Weisbecker   sched/cputime: Co...
1457
  	seqcount_init(&p->vtime_seqcount);
6a61671bb   Frederic Weisbecker   cputime: Safely r...
1458
  	p->vtime_snap = 0;
7098c1eac   Frederic Weisbecker   sched/cputime: Cl...
1459
  	p->vtime_snap_whence = VTIME_INACTIVE;
6a61671bb   Frederic Weisbecker   cputime: Safely r...
1460
  #endif
a3a2e76c7   KAMEZAWA Hiroyuki   mm: avoid null-po...
1461
1462
1463
  #if defined(SPLIT_RSS_COUNTING)
  	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
  #endif
172ba844a   Balbir Singh   sched: update del...
1464

6976675d9   Arjan van de Ven   hrtimer: create a...
1465
  	p->default_timer_slack_ns = current->timer_slack_ns;
5995477ab   Andrea Righi   task IO accountin...
1466
  	task_io_accounting_init(&p->ioac);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1467
  	acct_clear_integrals(p);
f06febc96   Frank Mayhar   timers: fix itime...
1468
  	posix_cpu_timers_init(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1469

ccbf62d8a   Thomas Gleixner   sched: Make task-...
1470
  	p->start_time = ktime_get_ns();
57e0be041   Thomas Gleixner   sched: Make task-...
1471
  	p->real_start_time = ktime_get_boot_ns();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1472
  	p->io_context = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1473
  	p->audit_context = NULL;
b4f48b636   Paul Menage   Task Control Grou...
1474
  	cgroup_fork(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1475
  #ifdef CONFIG_NUMA
846a16bf0   Lee Schermerhorn   mempolicy: rename...
1476
  	p->mempolicy = mpol_dup(p->mempolicy);
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1477
1478
1479
  	if (IS_ERR(p->mempolicy)) {
  		retval = PTR_ERR(p->mempolicy);
  		p->mempolicy = NULL;
e8604cb43   Li Zefan   cgroup: fix spuri...
1480
  		goto bad_fork_cleanup_threadgroup_lock;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1481
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1482
  #endif
778d3b0ff   Michal Hocko   cpusets: randomiz...
1483
1484
1485
  #ifdef CONFIG_CPUSETS
  	p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
  	p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1486
  	seqcount_init(&p->mems_allowed_seq);
778d3b0ff   Michal Hocko   cpusets: randomiz...
1487
  #endif
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
  #ifdef CONFIG_TRACE_IRQFLAGS
  	p->irq_events = 0;
  	p->hardirqs_enabled = 0;
  	p->hardirq_enable_ip = 0;
  	p->hardirq_enable_event = 0;
  	p->hardirq_disable_ip = _THIS_IP_;
  	p->hardirq_disable_event = 0;
  	p->softirqs_enabled = 1;
  	p->softirq_enable_ip = _THIS_IP_;
  	p->softirq_enable_event = 0;
  	p->softirq_disable_ip = 0;
  	p->softirq_disable_event = 0;
  	p->hardirq_context = 0;
  	p->softirq_context = 0;
  #endif
8bcbde548   David Hildenbrand   sched/preempt, mm...
1503
1504
  
  	p->pagefault_disabled = 0;
fbb9ce953   Ingo Molnar   [PATCH] lockdep: ...
1505
1506
1507
1508
1509
  #ifdef CONFIG_LOCKDEP
  	p->lockdep_depth = 0; /* no locks held yet */
  	p->curr_chain_key = 0;
  	p->lockdep_recursion = 0;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1510

408894ee4   Ingo Molnar   [PATCH] mutex sub...
1511
1512
1513
  #ifdef CONFIG_DEBUG_MUTEXES
  	p->blocked_on = NULL; /* not blocked yet */
  #endif
cafe56359   Kent Overstreet   bcache: A block l...
1514
1515
1516
1517
  #ifdef CONFIG_BCACHE
  	p->sequential_io	= 0;
  	p->sequential_io_avg	= 0;
  #endif
0f4814065   Markus Metzger   x86, ptrace: add ...
1518

3c90e6e99   Srivatsa Vaddagiri   sched: fix copy_n...
1519
  	/* Perform scheduler related setup. Assign this task to a CPU. */
aab03e05e   Dario Faggioli   sched/deadline: A...
1520
1521
1522
  	retval = sched_fork(clone_flags, p);
  	if (retval)
  		goto bad_fork_cleanup_policy;
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1523

cdd6c482c   Ingo Molnar   perf: Do the big ...
1524
  	retval = perf_event_init_task(p);
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1525
1526
  	if (retval)
  		goto bad_fork_cleanup_policy;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1527
1528
  	retval = audit_alloc(p);
  	if (retval)
6c72e3501   Peter Zijlstra   perf: fix perf bu...
1529
  		goto bad_fork_cleanup_perf;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1530
  	/* copy all the process information */
ab602f799   Jack Miller   shm: make exit_sh...
1531
  	shm_init_task(p);
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1532
1533
  	retval = copy_semundo(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1534
  		goto bad_fork_cleanup_audit;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1535
1536
  	retval = copy_files(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1537
  		goto bad_fork_cleanup_semundo;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1538
1539
  	retval = copy_fs(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1540
  		goto bad_fork_cleanup_files;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1541
1542
  	retval = copy_sighand(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1543
  		goto bad_fork_cleanup_fs;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1544
1545
  	retval = copy_signal(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1546
  		goto bad_fork_cleanup_sighand;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1547
1548
  	retval = copy_mm(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1549
  		goto bad_fork_cleanup_signal;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1550
1551
  	retval = copy_namespaces(clone_flags, p);
  	if (retval)
d84f4f992   David Howells   CRED: Inaugurate ...
1552
  		goto bad_fork_cleanup_mm;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1553
1554
  	retval = copy_io(clone_flags, p);
  	if (retval)
fd0928df9   Jens Axboe   ioprio: move io p...
1555
  		goto bad_fork_cleanup_namespaces;
3033f14ab   Josh Triplett   clone: support pa...
1556
  	retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1557
  	if (retval)
fd0928df9   Jens Axboe   ioprio: move io p...
1558
  		goto bad_fork_cleanup_io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1559

425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1560
  	if (pid != &init_struct_pid) {
c2b1df2eb   Andy Lutomirski   Rename nsproxy.pi...
1561
  		pid = alloc_pid(p->nsproxy->pid_ns_for_children);
35f71bc0a   Michal Hocko   fork: report pid ...
1562
1563
  		if (IS_ERR(pid)) {
  			retval = PTR_ERR(pid);
0740aa5f6   Jiri Slaby   fork: free thread...
1564
  			goto bad_fork_cleanup_thread;
35f71bc0a   Michal Hocko   fork: report pid ...
1565
  		}
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1566
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1567
1568
1569
1570
  	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
  	/*
  	 * Clear TID on mm_release()?
  	 */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1571
  	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
73c101011   Jens Axboe   block: initial pa...
1572
1573
1574
  #ifdef CONFIG_BLOCK
  	p->plug = NULL;
  #endif
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1575
  #ifdef CONFIG_FUTEX
8f17d3a50   Ingo Molnar   [PATCH] lightweig...
1576
1577
1578
1579
  	p->robust_list = NULL;
  #ifdef CONFIG_COMPAT
  	p->compat_robust_list = NULL;
  #endif
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1580
1581
  	INIT_LIST_HEAD(&p->pi_state_list);
  	p->pi_state_cache = NULL;
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1582
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1583
  	/*
f9a3879ab   GOTO Masanori   [PATCH] Fix sigal...
1584
1585
1586
  	 * sigaltstack should be cleared when sharing the same VM
  	 */
  	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
2a7421383   Stas Sergeev   signals/sigaltsta...
1587
  		sas_ss_reset(p);
f9a3879ab   GOTO Masanori   [PATCH] Fix sigal...
1588
1589
  
  	/*
6580807da   Oleg Nesterov   ptrace: copy_proc...
1590
1591
  	 * Syscall tracing and stepping should be turned off in the
  	 * child regardless of CLONE_PTRACE.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1592
  	 */
6580807da   Oleg Nesterov   ptrace: copy_proc...
1593
  	user_disable_single_step(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1594
  	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
ed75e8d58   Laurent Vivier   [PATCH] UML Suppo...
1595
1596
1597
  #ifdef TIF_SYSCALL_EMU
  	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
  #endif
9745512ce   Arjan van de Ven   sched: latencytop...
1598
  	clear_all_latency_tracing(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1599

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1600
  	/* ok, now we should be set up.. */
18c830df7   Oleg Nesterov   kernel/fork.c:cop...
1601
1602
  	p->pid = pid_nr(pid);
  	if (clone_flags & CLONE_THREAD) {
5f8aadd8b   Oleg Nesterov   CLONE_PARENT shou...
1603
  		p->exit_signal = -1;
18c830df7   Oleg Nesterov   kernel/fork.c:cop...
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
  		p->group_leader = current->group_leader;
  		p->tgid = current->tgid;
  	} else {
  		if (clone_flags & CLONE_PARENT)
  			p->exit_signal = current->group_leader->exit_signal;
  		else
  			p->exit_signal = (clone_flags & CSIGNAL);
  		p->group_leader = p;
  		p->tgid = p->pid;
  	}
5f8aadd8b   Oleg Nesterov   CLONE_PARENT shou...
1614

9d823e8f6   Wu Fengguang   writeback: per ta...
1615
1616
  	p->nr_dirtied = 0;
  	p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
83712358b   Wu Fengguang   writeback: dirty ...
1617
  	p->dirty_paused_when = 0;
9d823e8f6   Wu Fengguang   writeback: per ta...
1618

bb8cbbfee   Oleg Nesterov   tasks/fork: Remov...
1619
  	p->pdeath_signal = 0;
47e65328a   Oleg Nesterov   [PATCH] pids: kil...
1620
  	INIT_LIST_HEAD(&p->thread_group);
158e1645e   Al Viro   trim task_work: g...
1621
  	p->task_works = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1622

568ac8882   Balbir Singh   cgroup: reduce re...
1623
  	threadgroup_change_begin(current);
18c830df7   Oleg Nesterov   kernel/fork.c:cop...
1624
  	/*
7e47682ea   Aleksa Sarai   cgroup: allow a c...
1625
1626
1627
1628
1629
  	 * Ensure that the cgroup subsystem policies allow the new process to be
  	 * forked. It should be noted the the new process's css_set can be changed
  	 * between here and cgroup_post_fork() if an organisation operation is in
  	 * progress.
  	 */
b53202e63   Oleg Nesterov   cgroup: kill cgrp...
1630
  	retval = cgroup_can_fork(p);
7e47682ea   Aleksa Sarai   cgroup: allow a c...
1631
1632
1633
1634
  	if (retval)
  		goto bad_fork_free_pid;
  
  	/*
18c830df7   Oleg Nesterov   kernel/fork.c:cop...
1635
1636
1637
  	 * Make it visible to the rest of the system, but dont wake it up yet.
  	 * Need tasklist lock for parent etc handling!
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1638
  	write_lock_irq(&tasklist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1639
  	/* CLONE_PARENT re-uses the old parent */
2d5516cbb   Oleg Nesterov   copy_process: fix...
1640
  	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1641
  		p->real_parent = current->real_parent;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1642
1643
  		p->parent_exec_id = current->parent_exec_id;
  	} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1644
  		p->real_parent = current;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1645
1646
  		p->parent_exec_id = current->self_exec_id;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1647

3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1648
  	spin_lock(&current->sighand->siglock);
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1649
1650
  
  	/*
dbd952127   Kees Cook   seccomp: introduc...
1651
1652
1653
1654
1655
1656
  	 * Copy seccomp details explicitly here, in case they were changed
  	 * before holding sighand lock.
  	 */
  	copy_seccomp(p);
  
  	/*
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1657
1658
1659
1660
1661
1662
  	 * Process group and session signals need to be delivered to just the
  	 * parent before the fork or both the parent and the child after the
  	 * fork. Restart if a signal comes in before we add the new process to
  	 * it's process group.
  	 * A fatal signal pending means that current will exit, so the new
  	 * thread can't slip out of an OOM kill (or normal SIGKILL).
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1663
  	*/
23ff44402   Daniel Walker   whitespace fixes:...
1664
  	recalc_sigpending();
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1665
1666
1667
1668
  	if (signal_pending(current)) {
  		spin_unlock(&current->sighand->siglock);
  		write_unlock_irq(&tasklist_lock);
  		retval = -ERESTARTNOINTR;
7e47682ea   Aleksa Sarai   cgroup: allow a c...
1669
  		goto bad_fork_cancel_cgroup;
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1670
  	}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1671
  	if (likely(p->pid)) {
4b9d33e6d   Tejun Heo   ptrace: kill clon...
1672
  		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1673

819077398   Oleg Nesterov   kernel/fork.c:cop...
1674
  		init_task_pid(p, PIDTYPE_PID, pid);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1675
  		if (thread_group_leader(p)) {
819077398   Oleg Nesterov   kernel/fork.c:cop...
1676
1677
  			init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
  			init_task_pid(p, PIDTYPE_SID, task_session(current));
1c4042c29   Eric W. Biederman   pidns: Consolidat...
1678
  			if (is_child_reaper(pid)) {
17cf22c33   Eric W. Biederman   pidns: Use task_a...
1679
  				ns_of_pid(pid)->child_reaper = p;
1c4042c29   Eric W. Biederman   pidns: Consolidat...
1680
1681
  				p->signal->flags |= SIGNAL_UNKILLABLE;
  			}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1682

fea9d1755   Oleg Nesterov   ITIMER_REAL: conv...
1683
  			p->signal->leader_pid = pid;
9c9f4ded9   Alan Cox   tty: Add a kref c...
1684
  			p->signal->tty = tty_kref_get(current->signal->tty);
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
1685
  			list_add_tail(&p->sibling, &p->real_parent->children);
5e85d4abe   Eric W. Biederman   [PATCH] task: Mak...
1686
  			list_add_tail_rcu(&p->tasks, &init_task.tasks);
819077398   Oleg Nesterov   kernel/fork.c:cop...
1687
1688
  			attach_pid(p, PIDTYPE_PGID);
  			attach_pid(p, PIDTYPE_SID);
909ea9646   Christoph Lameter   core: Replace __g...
1689
  			__this_cpu_inc(process_counts);
80628ca06   Oleg Nesterov   kernel/fork.c:cop...
1690
1691
1692
1693
  		} else {
  			current->signal->nr_threads++;
  			atomic_inc(&current->signal->live);
  			atomic_inc(&current->signal->sigcnt);
80628ca06   Oleg Nesterov   kernel/fork.c:cop...
1694
1695
  			list_add_tail_rcu(&p->thread_group,
  					  &p->group_leader->thread_group);
0c740d0af   Oleg Nesterov   introduce for_eac...
1696
1697
  			list_add_tail_rcu(&p->thread_node,
  					  &p->signal->thread_head);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1698
  		}
819077398   Oleg Nesterov   kernel/fork.c:cop...
1699
  		attach_pid(p, PIDTYPE_PID);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1700
  		nr_threads++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1701
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1702
  	total_forks++;
3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1703
  	spin_unlock(&current->sighand->siglock);
4af4206be   Oleg Nesterov   tracing: Fix sysc...
1704
  	syscall_tracepoint_update(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1705
  	write_unlock_irq(&tasklist_lock);
4af4206be   Oleg Nesterov   tracing: Fix sysc...
1706

c13cf856c   Andrew Morton   [PATCH] fork.c: p...
1707
  	proc_fork_connector(p);
b53202e63   Oleg Nesterov   cgroup: kill cgrp...
1708
  	cgroup_post_fork(p);
c9e75f049   Oleg Nesterov   cgroup: pids: fix...
1709
  	threadgroup_change_end(current);
cdd6c482c   Ingo Molnar   perf: Do the big ...
1710
  	perf_event_fork(p);
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
1711
1712
  
  	trace_task_newtask(p, clone_flags);
3ab679661   Oleg Nesterov   uprobes: Teach up...
1713
  	uprobe_copy_process(p, clone_flags);
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
1714

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1715
  	return p;
7e47682ea   Aleksa Sarai   cgroup: allow a c...
1716
  bad_fork_cancel_cgroup:
b53202e63   Oleg Nesterov   cgroup: kill cgrp...
1717
  	cgroup_cancel_fork(p);
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1718
  bad_fork_free_pid:
568ac8882   Balbir Singh   cgroup: reduce re...
1719
  	threadgroup_change_end(current);
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1720
1721
  	if (pid != &init_struct_pid)
  		free_pid(pid);
0740aa5f6   Jiri Slaby   fork: free thread...
1722
1723
  bad_fork_cleanup_thread:
  	exit_thread(p);
fd0928df9   Jens Axboe   ioprio: move io p...
1724
  bad_fork_cleanup_io:
b69f22920   Louis Rilling   block: Fix io_con...
1725
1726
  	if (p->io_context)
  		exit_io_context(p);
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1727
  bad_fork_cleanup_namespaces:
444f378b2   Linus Torvalds   Revert "[PATCH] n...
1728
  	exit_task_namespaces(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1729
  bad_fork_cleanup_mm:
c9f01245b   David Rientjes   oom: remove oom_d...
1730
  	if (p->mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1731
1732
  		mmput(p->mm);
  bad_fork_cleanup_signal:
4ab6c0833   Oleg Nesterov   clone(): fix race...
1733
  	if (!(clone_flags & CLONE_THREAD))
1c5354de9   Mike Galbraith   sched: Move sched...
1734
  		free_signal_struct(p->signal);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1735
  bad_fork_cleanup_sighand:
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
1736
  	__cleanup_sighand(p->sighand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1737
1738
1739
1740
1741
1742
1743
1744
  bad_fork_cleanup_fs:
  	exit_fs(p); /* blocking */
  bad_fork_cleanup_files:
  	exit_files(p); /* blocking */
  bad_fork_cleanup_semundo:
  	exit_sem(p);
  bad_fork_cleanup_audit:
  	audit_free(p);
6c72e3501   Peter Zijlstra   perf: fix perf bu...
1745
  bad_fork_cleanup_perf:
cdd6c482c   Ingo Molnar   perf: Do the big ...
1746
  	perf_event_free_task(p);
6c72e3501   Peter Zijlstra   perf: fix perf bu...
1747
  bad_fork_cleanup_policy:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1748
  #ifdef CONFIG_NUMA
f0be3d32b   Lee Schermerhorn   mempolicy: rename...
1749
  	mpol_put(p->mempolicy);
e8604cb43   Li Zefan   cgroup: fix spuri...
1750
  bad_fork_cleanup_threadgroup_lock:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1751
  #endif
35df17c57   Shailabh Nagar   [PATCH] task dela...
1752
  	delayacct_tsk_free(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1753
  bad_fork_cleanup_count:
d84f4f992   David Howells   CRED: Inaugurate ...
1754
  	atomic_dec(&p->cred->user->processes);
e0e817392   David Howells   CRED: Add some co...
1755
  	exit_creds(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1756
  bad_fork_free:
405c07597   Andy Lutomirski   fork: Add task st...
1757
  	p->state = TASK_DEAD;
68f24b08e   Andy Lutomirski   sched/core: Free ...
1758
  	put_task_stack(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1759
  	free_task(p);
fe7d37d1f   Oleg Nesterov   [PATCH] copy_proc...
1760
1761
  fork_out:
  	return ERR_PTR(retval);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1762
  }
f106eee10   Oleg Nesterov   pids: fix fork_id...
1763
1764
1765
1766
1767
1768
1769
1770
1771
  static inline void init_idle_pids(struct pid_link *links)
  {
  	enum pid_type type;
  
  	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
  		INIT_HLIST_NODE(&links[type].node); /* not really needed */
  		links[type].pid = &init_struct_pid;
  	}
  }
0db0628d9   Paul Gortmaker   kernel: delete __...
1772
  struct task_struct *fork_idle(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1773
  {
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1774
  	struct task_struct *task;
725fc629f   Andi Kleen   kernek/fork.c: al...
1775
1776
  	task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
  			    cpu_to_node(cpu));
f106eee10   Oleg Nesterov   pids: fix fork_id...
1777
1778
  	if (!IS_ERR(task)) {
  		init_idle_pids(task->pids);
753ca4f31   Akinobu Mita   [PATCH] fix copy_...
1779
  		init_idle(task, cpu);
f106eee10   Oleg Nesterov   pids: fix fork_id...
1780
  	}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1781

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1782
1783
  	return task;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1784
1785
1786
1787
1788
1789
  /*
   *  Ok, this is the main fork-routine.
   *
   * It copies the process, and if successful kick-starts
   * it and waits for it to finish using the VM if required.
   */
3033f14ab   Josh Triplett   clone: support pa...
1790
  long _do_fork(unsigned long clone_flags,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1791
  	      unsigned long stack_start,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1792
1793
  	      unsigned long stack_size,
  	      int __user *parent_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
1794
1795
  	      int __user *child_tidptr,
  	      unsigned long tls)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1796
1797
1798
  {
  	struct task_struct *p;
  	int trace = 0;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1799
  	long nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1800

bdff746a3   Andrew Morton   clone: prepare to...
1801
  	/*
4b9d33e6d   Tejun Heo   ptrace: kill clon...
1802
1803
1804
1805
  	 * Determine whether and which event to report to ptracer.  When
  	 * called from kernel_thread or CLONE_UNTRACED is explicitly
  	 * requested, no event is reported; otherwise, report if the event
  	 * for the type of forking is enabled.
09a05394f   Roland McGrath   tracehook: clone
1806
  	 */
e80d6661c   Al Viro   flagday: kill pt_...
1807
  	if (!(clone_flags & CLONE_UNTRACED)) {
4b9d33e6d   Tejun Heo   ptrace: kill clon...
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
  		if (clone_flags & CLONE_VFORK)
  			trace = PTRACE_EVENT_VFORK;
  		else if ((clone_flags & CSIGNAL) != SIGCHLD)
  			trace = PTRACE_EVENT_CLONE;
  		else
  			trace = PTRACE_EVENT_FORK;
  
  		if (likely(!ptrace_event_enabled(current, trace)))
  			trace = 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1818

62e791c1b   Al Viro   don't pass regs t...
1819
  	p = copy_process(clone_flags, stack_start, stack_size,
725fc629f   Andi Kleen   kernek/fork.c: al...
1820
  			 child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
38addce8b   Emese Revfy   gcc-plugins: Add ...
1821
  	add_latent_entropy();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1822
1823
1824
1825
1826
1827
  	/*
  	 * Do this prior waking up the new thread - the thread pointer
  	 * might get invalid after that point, if the thread exits quickly.
  	 */
  	if (!IS_ERR(p)) {
  		struct completion vfork;
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1828
  		struct pid *pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1829

0a16b6075   Mathieu Desnoyers   tracing, sched: L...
1830
  		trace_sched_process_fork(current, p);
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1831
1832
  		pid = get_task_pid(p, PIDTYPE_PID);
  		nr = pid_vnr(pid);
30e49c263   Pavel Emelyanov   pid namespaces: a...
1833
1834
1835
  
  		if (clone_flags & CLONE_PARENT_SETTID)
  			put_user(nr, parent_tidptr);
a6f5e0637   Sukadev Bhattiprolu   pid namespaces: m...
1836

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1837
1838
1839
  		if (clone_flags & CLONE_VFORK) {
  			p->vfork_done = &vfork;
  			init_completion(&vfork);
d68b46fe1   Oleg Nesterov   vfork: make it ki...
1840
  			get_task_struct(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1841
  		}
3e51e3edf   Samir Bellabes   sched: Remove unu...
1842
  		wake_up_new_task(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1843

4b9d33e6d   Tejun Heo   ptrace: kill clon...
1844
1845
  		/* forking complete and child started to run, tell ptracer */
  		if (unlikely(trace))
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1846
  			ptrace_event_pid(trace, pid);
09a05394f   Roland McGrath   tracehook: clone
1847

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1848
  		if (clone_flags & CLONE_VFORK) {
d68b46fe1   Oleg Nesterov   vfork: make it ki...
1849
  			if (!wait_for_vfork_done(p, &vfork))
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1850
  				ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1851
  		}
4e52365f2   Matthew Dempsky   ptrace: fix fork ...
1852
1853
  
  		put_pid(pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1854
  	} else {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1855
  		nr = PTR_ERR(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1856
  	}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1857
  	return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1858
  }
3033f14ab   Josh Triplett   clone: support pa...
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
  #ifndef CONFIG_HAVE_COPY_THREAD_TLS
  /* For compatibility with architectures that call do_fork directly rather than
   * using the syscall entry points below. */
  long do_fork(unsigned long clone_flags,
  	      unsigned long stack_start,
  	      unsigned long stack_size,
  	      int __user *parent_tidptr,
  	      int __user *child_tidptr)
  {
  	return _do_fork(clone_flags, stack_start, stack_size,
  			parent_tidptr, child_tidptr, 0);
  }
  #endif
2aa3a7f86   Al Viro   preparation for g...
1872
1873
1874
1875
1876
  /*
   * Create a kernel thread.
   */
  pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
  {
3033f14ab   Josh Triplett   clone: support pa...
1877
1878
  	return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
  		(unsigned long)arg, NULL, NULL, 0);
2aa3a7f86   Al Viro   preparation for g...
1879
  }
2aa3a7f86   Al Viro   preparation for g...
1880

d2125043a   Al Viro   generic sys_fork ...
1881
1882
1883
1884
  #ifdef __ARCH_WANT_SYS_FORK
  SYSCALL_DEFINE0(fork)
  {
  #ifdef CONFIG_MMU
3033f14ab   Josh Triplett   clone: support pa...
1885
  	return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
d2125043a   Al Viro   generic sys_fork ...
1886
1887
  #else
  	/* can not support in nommu mode */
5d59e1827   Daeseok Youn   kernel/fork.c: fi...
1888
  	return -EINVAL;
d2125043a   Al Viro   generic sys_fork ...
1889
1890
1891
1892
1893
1894
1895
  #endif
  }
  #endif
  
  #ifdef __ARCH_WANT_SYS_VFORK
  SYSCALL_DEFINE0(vfork)
  {
3033f14ab   Josh Triplett   clone: support pa...
1896
1897
  	return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
  			0, NULL, NULL, 0);
d2125043a   Al Viro   generic sys_fork ...
1898
1899
1900
1901
1902
1903
1904
  }
  #endif
  
  #ifdef __ARCH_WANT_SYS_CLONE
  #ifdef CONFIG_CLONE_BACKWARDS
  SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
  		 int __user *, parent_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
1905
  		 unsigned long, tls,
d2125043a   Al Viro   generic sys_fork ...
1906
1907
1908
1909
1910
  		 int __user *, child_tidptr)
  #elif defined(CONFIG_CLONE_BACKWARDS2)
  SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
  		 int __user *, parent_tidptr,
  		 int __user *, child_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
1911
  		 unsigned long, tls)
dfa9771a7   Michal Simek   microblaze: fix c...
1912
1913
1914
1915
1916
  #elif defined(CONFIG_CLONE_BACKWARDS3)
  SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
  		int, stack_size,
  		int __user *, parent_tidptr,
  		int __user *, child_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
1917
  		unsigned long, tls)
d2125043a   Al Viro   generic sys_fork ...
1918
1919
1920
1921
  #else
  SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
  		 int __user *, parent_tidptr,
  		 int __user *, child_tidptr,
3033f14ab   Josh Triplett   clone: support pa...
1922
  		 unsigned long, tls)
d2125043a   Al Viro   generic sys_fork ...
1923
1924
  #endif
  {
3033f14ab   Josh Triplett   clone: support pa...
1925
  	return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
d2125043a   Al Viro   generic sys_fork ...
1926
1927
  }
  #endif
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
1928
1929
1930
  #ifndef ARCH_MIN_MMSTRUCT_ALIGN
  #define ARCH_MIN_MMSTRUCT_ALIGN 0
  #endif
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
1931
  static void sighand_ctor(void *data)
aa1757f90   Oleg Nesterov   [PATCH] convert s...
1932
1933
  {
  	struct sighand_struct *sighand = data;
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
1934
  	spin_lock_init(&sighand->siglock);
b8fceee17   Davide Libenzi   signalfd simplifi...
1935
  	init_waitqueue_head(&sighand->signalfd_wqh);
aa1757f90   Oleg Nesterov   [PATCH] convert s...
1936
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1937
1938
1939
1940
  void __init proc_caches_init(void)
  {
  	sighand_cachep = kmem_cache_create("sighand_cache",
  			sizeof(struct sighand_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1941
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
5d097056c   Vladimir Davydov   kmemcg: account c...
1942
  			SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1943
1944
  	signal_cachep = kmem_cache_create("signal_cache",
  			sizeof(struct signal_struct), 0,
5d097056c   Vladimir Davydov   kmemcg: account c...
1945
1946
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
  			NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
1947
  	files_cachep = kmem_cache_create("files_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1948
  			sizeof(struct files_struct), 0,
5d097056c   Vladimir Davydov   kmemcg: account c...
1949
1950
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
  			NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
1951
  	fs_cachep = kmem_cache_create("fs_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1952
  			sizeof(struct fs_struct), 0,
5d097056c   Vladimir Davydov   kmemcg: account c...
1953
1954
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
  			NULL);
6345d24da   Linus Torvalds   mm: Fix boot cras...
1955
1956
1957
1958
1959
1960
1961
  	/*
  	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
  	 * whole struct cpumask for the OFFSTACK case. We could change
  	 * this to *only* allocate as much of it as required by the
  	 * maximum number of CPU's we can ever have.  The cpumask_allocation
  	 * is at the end of the structure, exactly for that reason.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1962
  	mm_cachep = kmem_cache_create("mm_struct",
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
1963
  			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
5d097056c   Vladimir Davydov   kmemcg: account c...
1964
1965
1966
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
  			NULL);
  	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
8feae1311   David Howells   NOMMU: Make VMAs ...
1967
  	mmap_init();
665771939   Al Viro   make sure that ns...
1968
  	nsproxy_cache_init();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1969
  }
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1970

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1971
  /*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1972
   * Check constraints on flags passed to the unshare system call.
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1973
   */
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1974
  static int check_unshare_flags(unsigned long unshare_flags)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1975
  {
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1976
1977
  	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
  				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
50804fe37   Eric W. Biederman   pidns: Support un...
1978
  				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
a79a908fd   Aditya Kali   cgroup: introduce...
1979
  				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1980
  		return -EINVAL;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1981
  	/*
12c641ab8   Eric W. Biederman   unshare: Unsharin...
1982
1983
1984
1985
  	 * Not implemented, but pretend it works if there is nothing
  	 * to unshare.  Note that unsharing the address space or the
  	 * signal handlers also need to unshare the signal queues (aka
  	 * CLONE_THREAD).
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1986
  	 */
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1987
  	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
12c641ab8   Eric W. Biederman   unshare: Unsharin...
1988
1989
1990
1991
1992
1993
1994
1995
1996
  		if (!thread_group_empty(current))
  			return -EINVAL;
  	}
  	if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
  		if (atomic_read(&current->sighand->count) > 1)
  			return -EINVAL;
  	}
  	if (unshare_flags & CLONE_VM) {
  		if (!current_is_single_threaded())
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1997
1998
  			return -EINVAL;
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1999
2000
2001
2002
2003
  
  	return 0;
  }
  
  /*
99d1419d9   JANAK DESAI   [PATCH] unshare s...
2004
   * Unshare the filesystem structure if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2005
2006
2007
2008
   */
  static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
  {
  	struct fs_struct *fs = current->fs;
498052bba   Al Viro   New locking/refco...
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
  	if (!(unshare_flags & CLONE_FS) || !fs)
  		return 0;
  
  	/* don't need lock here; in the worst case we'll do useless copy */
  	if (fs->users == 1)
  		return 0;
  
  	*new_fsp = copy_fs_struct(fs);
  	if (!*new_fsp)
  		return -ENOMEM;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2019
2020
2021
2022
2023
  
  	return 0;
  }
  
  /*
a016f3389   JANAK DESAI   [PATCH] unshare s...
2024
   * Unshare file descriptor table if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2025
2026
2027
2028
   */
  static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
  {
  	struct files_struct *fd = current->files;
a016f3389   JANAK DESAI   [PATCH] unshare s...
2029
  	int error = 0;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2030
2031
  
  	if ((unshare_flags & CLONE_FILES) &&
a016f3389   JANAK DESAI   [PATCH] unshare s...
2032
2033
2034
2035
2036
  	    (fd && atomic_read(&fd->count) > 1)) {
  		*new_fdp = dup_fd(fd, &error);
  		if (!*new_fdp)
  			return error;
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2037
2038
2039
2040
2041
  
  	return 0;
  }
  
  /*
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2042
2043
2044
2045
2046
2047
2048
   * unshare allows a process to 'unshare' part of the process
   * context which was originally shared using clone.  copy_*
   * functions used by do_fork() cannot be used here directly
   * because they modify an inactive task_struct that is being
   * constructed. Here we are modifying the current, active,
   * task_struct.
   */
6559eed8c   Heiko Carstens   [CVE-2009-0029] S...
2049
  SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2050
  {
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2051
  	struct fs_struct *fs, *new_fs = NULL;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2052
  	struct files_struct *fd, *new_fd = NULL;
b2e0d9870   Eric W. Biederman   userns: Implement...
2053
  	struct cred *new_cred = NULL;
cf7b708c8   Pavel Emelyanov   Make access to ta...
2054
  	struct nsproxy *new_nsproxy = NULL;
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
2055
  	int do_sysvsem = 0;
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2056
  	int err;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2057

50804fe37   Eric W. Biederman   pidns: Support un...
2058
  	/*
faf00da54   Eric W. Biederman   userns,pidns: For...
2059
2060
  	 * If unsharing a user namespace must also unshare the thread group
  	 * and unshare the filesystem root and working directories.
b2e0d9870   Eric W. Biederman   userns: Implement...
2061
2062
  	 */
  	if (unshare_flags & CLONE_NEWUSER)
e66eded83   Eric W. Biederman   userns: Don't all...
2063
  		unshare_flags |= CLONE_THREAD | CLONE_FS;
b2e0d9870   Eric W. Biederman   userns: Implement...
2064
  	/*
50804fe37   Eric W. Biederman   pidns: Support un...
2065
2066
2067
2068
  	 * If unsharing vm, must also unshare signal handlers.
  	 */
  	if (unshare_flags & CLONE_VM)
  		unshare_flags |= CLONE_SIGHAND;
6013f67fc   Manfred Spraul   ipc: sysvsem: for...
2069
  	/*
12c641ab8   Eric W. Biederman   unshare: Unsharin...
2070
2071
2072
2073
2074
  	 * If unsharing a signal handlers, must also unshare the signal queues.
  	 */
  	if (unshare_flags & CLONE_SIGHAND)
  		unshare_flags |= CLONE_THREAD;
  	/*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2075
2076
2077
2078
  	 * If unsharing namespace, must also unshare filesystem information.
  	 */
  	if (unshare_flags & CLONE_NEWNS)
  		unshare_flags |= CLONE_FS;
50804fe37   Eric W. Biederman   pidns: Support un...
2079
2080
2081
2082
  
  	err = check_unshare_flags(unshare_flags);
  	if (err)
  		goto bad_unshare_out;
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2083
  	/*
6013f67fc   Manfred Spraul   ipc: sysvsem: for...
2084
2085
2086
2087
2088
  	 * CLONE_NEWIPC must also detach from the undolist: after switching
  	 * to a new ipc namespace, the semaphore arrays from the old
  	 * namespace are unreachable.
  	 */
  	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
2089
  		do_sysvsem = 1;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
2090
2091
  	err = unshare_fs(unshare_flags, &new_fs);
  	if (err)
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2092
  		goto bad_unshare_out;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
2093
2094
  	err = unshare_fd(unshare_flags, &new_fd);
  	if (err)
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
2095
  		goto bad_unshare_cleanup_fs;
b2e0d9870   Eric W. Biederman   userns: Implement...
2096
  	err = unshare_userns(unshare_flags, &new_cred);
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
2097
  	if (err)
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
2098
  		goto bad_unshare_cleanup_fd;
b2e0d9870   Eric W. Biederman   userns: Implement...
2099
2100
2101
2102
  	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
  					 new_cred, new_fs);
  	if (err)
  		goto bad_unshare_cleanup_cred;
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
2103

b2e0d9870   Eric W. Biederman   userns: Implement...
2104
  	if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
2105
2106
2107
2108
2109
2110
  		if (do_sysvsem) {
  			/*
  			 * CLONE_SYSVSEM is equivalent to sys_exit().
  			 */
  			exit_sem(current);
  		}
ab602f799   Jack Miller   shm: make exit_sh...
2111
2112
2113
2114
2115
  		if (unshare_flags & CLONE_NEWIPC) {
  			/* Orphan segments in old ns (see sem above). */
  			exit_shm(current);
  			shm_init_task(current);
  		}
ab516013a   Serge E. Hallyn   [PATCH] namespace...
2116

6f977e6b2   Alan Cox   fork: unshare: re...
2117
  		if (new_nsproxy)
cf7b708c8   Pavel Emelyanov   Make access to ta...
2118
  			switch_task_namespaces(current, new_nsproxy);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2119

cf7b708c8   Pavel Emelyanov   Make access to ta...
2120
  		task_lock(current);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2121
2122
  		if (new_fs) {
  			fs = current->fs;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
2123
  			spin_lock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2124
  			current->fs = new_fs;
498052bba   Al Viro   New locking/refco...
2125
2126
2127
2128
  			if (--fs->users)
  				new_fs = NULL;
  			else
  				new_fs = fs;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
2129
  			spin_unlock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2130
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2131
2132
2133
2134
2135
2136
2137
  		if (new_fd) {
  			fd = current->files;
  			current->files = new_fd;
  			new_fd = fd;
  		}
  
  		task_unlock(current);
b2e0d9870   Eric W. Biederman   userns: Implement...
2138
2139
2140
2141
2142
2143
  
  		if (new_cred) {
  			/* Install the new user namespace */
  			commit_creds(new_cred);
  			new_cred = NULL;
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2144
  	}
b2e0d9870   Eric W. Biederman   userns: Implement...
2145
2146
2147
  bad_unshare_cleanup_cred:
  	if (new_cred)
  		put_cred(new_cred);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2148
2149
2150
  bad_unshare_cleanup_fd:
  	if (new_fd)
  		put_files_struct(new_fd);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2151
2152
  bad_unshare_cleanup_fs:
  	if (new_fs)
498052bba   Al Viro   New locking/refco...
2153
  		free_fs_struct(new_fs);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2154

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
2155
2156
2157
  bad_unshare_out:
  	return err;
  }
3b1253880   Al Viro   [PATCH] sanitize ...
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
  
  /*
   *	Helper to unshare the files of the current task.
   *	We don't want to expose copy_files internals to
   *	the exec layer of the kernel.
   */
  
  int unshare_files(struct files_struct **displaced)
  {
  	struct task_struct *task = current;
50704516f   Al Viro   Fix uninitialized...
2168
  	struct files_struct *copy = NULL;
3b1253880   Al Viro   [PATCH] sanitize ...
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
  	int error;
  
  	error = unshare_fd(CLONE_FILES, &copy);
  	if (error || !copy) {
  		*displaced = NULL;
  		return error;
  	}
  	*displaced = task->files;
  	task_lock(task);
  	task->files = copy;
  	task_unlock(task);
  	return 0;
  }
16db3d3f1   Heinrich Schuchardt   kernel/sysctl.c: ...
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
  
  int sysctl_max_threads(struct ctl_table *table, int write,
  		       void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	struct ctl_table t;
  	int ret;
  	int threads = max_threads;
  	int min = MIN_THREADS;
  	int max = MAX_THREADS;
  
  	t = *table;
  	t.data = &threads;
  	t.extra1 = &min;
  	t.extra2 = &max;
  
  	ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
  	if (ret || !write)
  		return ret;
  
  	set_max_threads(threads);
  
  	return 0;
  }