Blame view

kernel/fork.c 44.4 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   *  linux/kernel/fork.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
  /*
   *  'fork.c' contains the help-routines for the 'fork' system call
   * (see also entry.S and others).
   * Fork is rather simple, once you get the hang of it, but the memory
   * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/unistd.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
18
  #include <linux/module.h>
  #include <linux/vmalloc.h>
  #include <linux/completion.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
22
  #include <linux/personality.h>
  #include <linux/mempolicy.h>
  #include <linux/sem.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
23
  #include <linux/fdtable.h>
da9cbc873   Jens Axboe   block: blkdev.h c...
24
  #include <linux/iocontext.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
  #include <linux/key.h>
  #include <linux/binfmts.h>
  #include <linux/mman.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
28
  #include <linux/mmu_notifier.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  #include <linux/fs.h>
ab516013a   Serge E. Hallyn   [PATCH] namespace...
30
  #include <linux/nsproxy.h>
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
31
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  #include <linux/cpu.h>
b4f48b636   Paul Menage   Task Control Grou...
33
  #include <linux/cgroup.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34
  #include <linux/security.h>
a1e78772d   Mel Gorman   hugetlb: reserve ...
35
  #include <linux/hugetlb.h>
e2cfabdfd   Will Drewry   seccomp: add syst...
36
  #include <linux/seccomp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
38
39
40
  #include <linux/swap.h>
  #include <linux/syscalls.h>
  #include <linux/jiffies.h>
  #include <linux/futex.h>
8141c7f3e   Linus Torvalds   Move "exit_robust...
41
  #include <linux/compat.h>
207205a2b   Eric Dumazet   kthread: NUMA awa...
42
  #include <linux/kthread.h>
7c3ab7381   Andrew Morton   [PATCH] io-accoun...
43
  #include <linux/task_io_accounting_ops.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
44
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
  #include <linux/ptrace.h>
  #include <linux/mount.h>
  #include <linux/audit.h>
78fb74669   Pavel Emelianov   Memory controller...
48
  #include <linux/memcontrol.h>
f201ae235   Frederic Weisbecker   tracing/function-...
49
  #include <linux/ftrace.h>
5e2bf0142   Mike Galbraith   namespaces, pid_n...
50
  #include <linux/proc_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
51
52
  #include <linux/profile.h>
  #include <linux/rmap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
53
  #include <linux/ksm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
54
  #include <linux/acct.h>
8f0ab5147   Jay Lan   [PATCH] csa: conv...
55
  #include <linux/tsacct_kern.h>
9f46080c4   Matt Helsley   [PATCH] Process E...
56
  #include <linux/cn_proc.h>
ba96a0c88   Rafael J. Wysocki   freezer: fix vfor...
57
  #include <linux/freezer.h>
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
58
  #include <linux/delayacct.h>
ad4ecbcba   Shailabh Nagar   [PATCH] delay acc...
59
  #include <linux/taskstats_kern.h>
0a4254058   Arjan van de Ven   [PATCH] Add the c...
60
  #include <linux/random.h>
522ed7767   Miloslav Trmac   Audit: add TTY in...
61
  #include <linux/tty.h>
fd0928df9   Jens Axboe   ioprio: move io p...
62
  #include <linux/blkdev.h>
5ad4e53bd   Al Viro   Get rid of indire...
63
  #include <linux/fs_struct.h>
7c9f8861e   Eric Sandeen   stackprotector: u...
64
  #include <linux/magic.h>
cdd6c482c   Ingo Molnar   perf: Do the big ...
65
  #include <linux/perf_event.h>
42c4ab41a   Stanislaw Gruszka   itimers: Merge IT...
66
  #include <linux/posix-timers.h>
8e7cac798   Avi Kivity   core: Fix user re...
67
  #include <linux/user-return-notifier.h>
3d5992d2a   Ying Han   oom: add per-mm o...
68
  #include <linux/oom.h>
ba76149f4   Andrea Arcangeli   thp: khugepaged
69
  #include <linux/khugepaged.h>
d80e731ec   Oleg Nesterov   epoll: introduce ...
70
  #include <linux/signalfd.h>
0326f5a94   Srikar Dronamraju   uprobes/core: Han...
71
  #include <linux/uprobes.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
72
73
74
75
76
77
78
  
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
  #include <asm/uaccess.h>
  #include <asm/mmu_context.h>
  #include <asm/cacheflush.h>
  #include <asm/tlbflush.h>
ad8d75fff   Steven Rostedt   tracing/events: m...
79
  #include <trace/events/sched.h>
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
80
81
  #define CREATE_TRACE_POINTS
  #include <trace/events/task.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
83
84
85
  /*
   * Protected counters by write_lock_irq(&tasklist_lock)
   */
  unsigned long total_forks;	/* Handle normal Linux uptimes. */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
86
  int nr_threads;			/* The idle threads do not count.. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
89
90
  
  int max_threads;		/* tunable limit on nr_threads */
  
  DEFINE_PER_CPU(unsigned long, process_counts) = 0;
c59923a15   Christoph Hellwig   [PATCH] remove th...
91
  __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
db1466b3e   Paul E. McKenney   rcu: Use wrapper ...
92
93
94
95
96
97
98
99
  
  #ifdef CONFIG_PROVE_RCU
  int lockdep_tasklist_lock_is_held(void)
  {
  	return lockdep_is_held(&tasklist_lock);
  }
  EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
  #endif /* #ifdef CONFIG_PROVE_RCU */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
100
101
102
103
104
  
  int nr_processes(void)
  {
  	int cpu;
  	int total = 0;
1d5107509   Ian Campbell   Correct nr_proces...
105
  	for_each_possible_cpu(cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
107
108
109
  		total += per_cpu(process_counts, cpu);
  
  	return total;
  }
f5e102873   Thomas Gleixner   task_allocator: U...
110
  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
111
  static struct kmem_cache *task_struct_cachep;
41101809a   Thomas Gleixner   fork: Provide wea...
112
113
114
115
116
117
118
119
120
121
122
123
124
  
  static inline struct task_struct *alloc_task_struct_node(int node)
  {
  	return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
  }
  
  void __weak arch_release_task_struct(struct task_struct *tsk) { }
  
  static inline void free_task_struct(struct task_struct *tsk)
  {
  	arch_release_task_struct(tsk);
  	kmem_cache_free(task_struct_cachep, tsk);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
  #endif
f5e102873   Thomas Gleixner   task_allocator: U...
126
  #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
41101809a   Thomas Gleixner   fork: Provide wea...
127
  void __weak arch_release_thread_info(struct thread_info *ti) { }
0d15d74a1   Thomas Gleixner   fork: Provide kme...
128
129
130
131
132
  /*
   * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
   * kmemcache based allocator.
   */
  # if THREAD_SIZE >= PAGE_SIZE
b6a84016b   Eric Dumazet   mm: NUMA aware al...
133
134
  static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
  						  int node)
b69c49b78   FUJITA Tomonori   clean up duplicat...
135
  {
2889f6081   Thomas Gleixner   fork: Move thread...
136
137
  	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
  					     THREAD_SIZE_ORDER);
b6a84016b   Eric Dumazet   mm: NUMA aware al...
138
139
  
  	return page ? page_address(page) : NULL;
b69c49b78   FUJITA Tomonori   clean up duplicat...
140
141
142
143
  }
  
  static inline void free_thread_info(struct thread_info *ti)
  {
41101809a   Thomas Gleixner   fork: Provide wea...
144
  	arch_release_thread_info(ti);
b69c49b78   FUJITA Tomonori   clean up duplicat...
145
146
  	free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
  }
0d15d74a1   Thomas Gleixner   fork: Provide kme...
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
  # else
  static struct kmem_cache *thread_info_cache;
  
  static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
  						  int node)
  {
  	return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
  }
  
  static void free_thread_info(struct thread_info *ti)
  {
  	arch_release_thread_info(ti);
  	kmem_cache_free(thread_info_cache, ti);
  }
  
  void thread_info_cache_init(void)
  {
  	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
  					      THREAD_SIZE, 0, NULL);
  	BUG_ON(thread_info_cache == NULL);
  }
  # endif
b69c49b78   FUJITA Tomonori   clean up duplicat...
169
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
170
  /* SLAB cache for signal_struct structures (tsk->signal) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
171
  static struct kmem_cache *signal_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
  
  /* SLAB cache for sighand_struct structures (tsk->sighand) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
174
  struct kmem_cache *sighand_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
  
  /* SLAB cache for files_struct structures (tsk->files) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
177
  struct kmem_cache *files_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
179
  
  /* SLAB cache for fs_struct structures (tsk->fs) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
180
  struct kmem_cache *fs_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
182
  
  /* SLAB cache for vm_area_struct structures */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
183
  struct kmem_cache *vm_area_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
185
  
  /* SLAB cache for mm_struct structures (tsk->mm) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
186
  static struct kmem_cache *mm_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187

c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
188
189
190
191
192
193
  static void account_kernel_stack(struct thread_info *ti, int account)
  {
  	struct zone *zone = page_zone(virt_to_page(ti));
  
  	mod_zone_page_state(zone, NR_KERNEL_STACK, account);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194
195
  void free_task(struct task_struct *tsk)
  {
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
196
  	account_kernel_stack(tsk->stack, -1);
f7e4217b0   Roman Zippel   rename thread_inf...
197
  	free_thread_info(tsk->stack);
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
198
  	rt_mutex_debug_task_free(tsk);
fb52607af   Frederic Weisbecker   tracing/function-...
199
  	ftrace_graph_exit_task(tsk);
e2cfabdfd   Will Drewry   seccomp: add syst...
200
  	put_seccomp_filter(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201
202
203
  	free_task_struct(tsk);
  }
  EXPORT_SYMBOL(free_task);
ea6d290ca   Oleg Nesterov   signals: make tas...
204
205
  static inline void free_signal_struct(struct signal_struct *sig)
  {
97101eb41   Oleg Nesterov   exit: move taskst...
206
  	taskstats_tgid_free(sig);
1c5354de9   Mike Galbraith   sched: Move sched...
207
  	sched_autogroup_exit(sig);
ea6d290ca   Oleg Nesterov   signals: make tas...
208
209
210
211
212
  	kmem_cache_free(signal_cachep, sig);
  }
  
  static inline void put_signal_struct(struct signal_struct *sig)
  {
1c5354de9   Mike Galbraith   sched: Move sched...
213
  	if (atomic_dec_and_test(&sig->sigcnt))
ea6d290ca   Oleg Nesterov   signals: make tas...
214
215
  		free_signal_struct(sig);
  }
158d9ebd1   Andrew Morton   [PATCH] resurrect...
216
  void __put_task_struct(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
217
  {
270f722d4   Eugene Teo   Fix tsk->exit_sta...
218
  	WARN_ON(!tsk->exit_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
219
220
  	WARN_ON(atomic_read(&tsk->usage));
  	WARN_ON(tsk == current);
1a2a4d06e   Kees Cook   security: create ...
221
  	security_task_free(tsk);
e0e817392   David Howells   CRED: Add some co...
222
  	exit_creds(tsk);
35df17c57   Shailabh Nagar   [PATCH] task dela...
223
  	delayacct_tsk_free(tsk);
ea6d290ca   Oleg Nesterov   signals: make tas...
224
  	put_signal_struct(tsk->signal);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
225
226
227
228
  
  	if (!profile_handoff_task(tsk))
  		free_task(tsk);
  }
77c100c83   Rik van Riel   export pid symbol...
229
  EXPORT_SYMBOL_GPL(__put_task_struct);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230

6c0a9fa62   Thomas Gleixner   fork: Remove the ...
231
  void __init __weak arch_task_cache_init(void) { }
61c4628b5   Suresh Siddha   x86, fpu: split F...
232

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
234
  void __init fork_init(unsigned long mempages)
  {
f5e102873   Thomas Gleixner   task_allocator: U...
235
  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
236
237
238
239
240
241
  #ifndef ARCH_MIN_TASKALIGN
  #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
  #endif
  	/* create a slab on which task_structs can be allocated */
  	task_struct_cachep =
  		kmem_cache_create("task_struct", sizeof(struct task_struct),
2dff44052   Vegard Nossum   kmemcheck: add mm...
242
  			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
243
  #endif
61c4628b5   Suresh Siddha   x86, fpu: split F...
244
245
  	/* do the arch specific task caches init */
  	arch_task_cache_init();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
246
247
248
249
250
251
252
253
254
255
  	/*
  	 * The default maximum number of threads is set to a safe
  	 * value: the thread structures can take up at most half
  	 * of memory.
  	 */
  	max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
  
  	/*
  	 * we need to allow at least 20 threads to boot a system
  	 */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
256
  	if (max_threads < 20)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
257
258
259
260
261
262
263
  		max_threads = 20;
  
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
  		init_task.signal->rlim[RLIMIT_NPROC];
  }
61c4628b5   Suresh Siddha   x86, fpu: split F...
264
265
266
267
268
269
  int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
  					       struct task_struct *src)
  {
  	*dst = *src;
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270
271
272
273
  static struct task_struct *dup_task_struct(struct task_struct *orig)
  {
  	struct task_struct *tsk;
  	struct thread_info *ti;
7c9f8861e   Eric Sandeen   stackprotector: u...
274
  	unsigned long *stackend;
207205a2b   Eric Dumazet   kthread: NUMA awa...
275
  	int node = tsk_fork_get_node(orig);
3e26c149c   Peter Zijlstra   mm: dirty balanci...
276
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277

504f52b54   Eric Dumazet   mm: NUMA aware al...
278
  	tsk = alloc_task_struct_node(node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
279
280
  	if (!tsk)
  		return NULL;
b6a84016b   Eric Dumazet   mm: NUMA aware al...
281
  	ti = alloc_thread_info_node(tsk, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282
283
284
285
  	if (!ti) {
  		free_task_struct(tsk);
  		return NULL;
  	}
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
286
  	err = arch_dup_task_struct(tsk, orig);
61c4628b5   Suresh Siddha   x86, fpu: split F...
287
288
  	if (err)
  		goto out;
f7e4217b0   Roman Zippel   rename thread_inf...
289
  	tsk->stack = ti;
3e26c149c   Peter Zijlstra   mm: dirty balanci...
290

10ebffde3   Al Viro   [PATCH] m68k: int...
291
  	setup_thread_stack(tsk, orig);
8e7cac798   Avi Kivity   core: Fix user re...
292
  	clear_user_return_notifier(tsk);
f26f9aff6   Mike Galbraith   Sched: fix skip_c...
293
  	clear_tsk_need_resched(tsk);
7c9f8861e   Eric Sandeen   stackprotector: u...
294
295
  	stackend = end_of_stack(tsk);
  	*stackend = STACK_END_MAGIC;	/* for overflow detection */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296

0a4254058   Arjan van de Ven   [PATCH] Add the c...
297
298
299
  #ifdef CONFIG_CC_STACKPROTECTOR
  	tsk->stack_canary = get_random_int();
  #endif
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
300
301
302
303
304
  	/*
  	 * One for us, one for whoever does the "release_task()" (usually
  	 * parent)
  	 */
  	atomic_set(&tsk->usage, 2);
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
305
  #ifdef CONFIG_BLK_DEV_IO_TRACE
2056a782f   Jens Axboe   [PATCH] Block que...
306
  	tsk->btrace_seq = 0;
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
307
  #endif
a0aa7f68a   Jens Axboe   [PATCH] Don't inh...
308
  	tsk->splice_pipe = NULL;
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
309
310
  
  	account_kernel_stack(ti, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
311
  	return tsk;
61c4628b5   Suresh Siddha   x86, fpu: split F...
312
313
314
315
316
  
  out:
  	free_thread_info(ti);
  	free_task_struct(tsk);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
317
318
319
  }
  
  #ifdef CONFIG_MMU
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
320
  static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321
  {
297c5eee3   Linus Torvalds   mm: make the vma ...
322
  	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
323
324
325
326
327
328
  	struct rb_node **rb_link, *rb_parent;
  	int retval;
  	unsigned long charge;
  	struct mempolicy *pol;
  
  	down_write(&oldmm->mmap_sem);
ec8c0446b   Ralf Baechle   [PATCH] Optimize ...
329
  	flush_cache_dup_mm(oldmm);
ad3394517   Ingo Molnar   [PATCH] lockdep: ...
330
331
332
333
  	/*
  	 * Not linked in yet - no deadlock potential:
  	 */
  	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
334

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
335
336
337
338
  	mm->locked_vm = 0;
  	mm->mmap = NULL;
  	mm->mmap_cache = NULL;
  	mm->free_area_cache = oldmm->mmap_base;
1363c3cd8   Wolfgang Wander   [PATCH] Avoiding ...
339
  	mm->cached_hole_size = ~0UL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
  	mm->map_count = 0;
948942445   Rusty Russell   cpumask: use mm_c...
341
  	cpumask_clear(mm_cpumask(mm));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
342
343
344
345
  	mm->mm_rb = RB_ROOT;
  	rb_link = &mm->mm_rb.rb_node;
  	rb_parent = NULL;
  	pprev = &mm->mmap;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
346
347
348
  	retval = ksm_fork(mm, oldmm);
  	if (retval)
  		goto out;
ba76149f4   Andrea Arcangeli   thp: khugepaged
349
350
351
  	retval = khugepaged_fork(mm, oldmm);
  	if (retval)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352

297c5eee3   Linus Torvalds   mm: make the vma ...
353
  	prev = NULL;
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
354
  	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
355
356
357
  		struct file *file;
  
  		if (mpnt->vm_flags & VM_DONTCOPY) {
3b6bfcdb1   Hugh Dickins   [PATCH] lower VM_...
358
359
  			long pages = vma_pages(mpnt);
  			mm->total_vm -= pages;
ab50b8ed8   Hugh Dickins   [PATCH] mm: vm_st...
360
  			vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
3b6bfcdb1   Hugh Dickins   [PATCH] lower VM_...
361
  								-pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
362
363
364
365
  			continue;
  		}
  		charge = 0;
  		if (mpnt->vm_flags & VM_ACCOUNT) {
7edc8b0ac   Siddhesh Poyarekar   mm/fork: fix over...
366
367
  			unsigned long len;
  			len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
191c54244   Al Viro   mm: collapse secu...
368
  			if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369
370
371
  				goto fail_nomem;
  			charge = len;
  		}
e94b17660   Christoph Lameter   [PATCH] slab: rem...
372
  		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
373
374
375
  		if (!tmp)
  			goto fail_nomem;
  		*tmp = *mpnt;
5beb49305   Rik van Riel   mm: change anon_v...
376
  		INIT_LIST_HEAD(&tmp->anon_vma_chain);
846a16bf0   Lee Schermerhorn   mempolicy: rename...
377
  		pol = mpol_dup(vma_policy(mpnt));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378
379
380
381
  		retval = PTR_ERR(pol);
  		if (IS_ERR(pol))
  			goto fail_nomem_policy;
  		vma_set_policy(tmp, pol);
a247c3a97   Andrea Arcangeli   rmap: fix walk du...
382
  		tmp->vm_mm = mm;
5beb49305   Rik van Riel   mm: change anon_v...
383
384
  		if (anon_vma_fork(tmp, mpnt))
  			goto fail_nomem_anon_vma_fork;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
385
  		tmp->vm_flags &= ~VM_LOCKED;
297c5eee3   Linus Torvalds   mm: make the vma ...
386
  		tmp->vm_next = tmp->vm_prev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
387
388
  		file = tmp->vm_file;
  		if (file) {
f3a43f3f6   Josef "Jeff" Sipek   [PATCH] kernel: c...
389
  			struct inode *inode = file->f_path.dentry->d_inode;
b88ed2059   Hugh Dickins   fix mapping_writa...
390
  			struct address_space *mapping = file->f_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
391
392
393
  			get_file(file);
  			if (tmp->vm_flags & VM_DENYWRITE)
  				atomic_dec(&inode->i_writecount);
3d48ae45e   Peter Zijlstra   mm: Convert i_mma...
394
  			mutex_lock(&mapping->i_mmap_mutex);
b88ed2059   Hugh Dickins   fix mapping_writa...
395
396
  			if (tmp->vm_flags & VM_SHARED)
  				mapping->i_mmap_writable++;
b88ed2059   Hugh Dickins   fix mapping_writa...
397
398
  			flush_dcache_mmap_lock(mapping);
  			/* insert tmp into the share list, just after mpnt */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
399
  			vma_prio_tree_add(tmp, mpnt);
b88ed2059   Hugh Dickins   fix mapping_writa...
400
  			flush_dcache_mmap_unlock(mapping);
3d48ae45e   Peter Zijlstra   mm: Convert i_mma...
401
  			mutex_unlock(&mapping->i_mmap_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
402
403
404
  		}
  
  		/*
a1e78772d   Mel Gorman   hugetlb: reserve ...
405
406
407
408
409
410
411
412
  		 * Clear hugetlb-related page reserves for children. This only
  		 * affects MAP_PRIVATE mappings. Faults generated by the child
  		 * are not guaranteed to succeed, even if read-only
  		 */
  		if (is_vm_hugetlb_page(tmp))
  			reset_vma_resv_huge_pages(tmp);
  
  		/*
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
413
  		 * Link in the new vma and copy the page table entries.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
414
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415
416
  		*pprev = tmp;
  		pprev = &tmp->vm_next;
297c5eee3   Linus Torvalds   mm: make the vma ...
417
418
  		tmp->vm_prev = prev;
  		prev = tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
420
421
422
423
424
  
  		__vma_link_rb(mm, tmp, rb_link, rb_parent);
  		rb_link = &tmp->vm_rb.rb_right;
  		rb_parent = &tmp->vm_rb;
  
  		mm->map_count++;
0b0db14c5   Hugh Dickins   [PATCH] unpaged: ...
425
  		retval = copy_page_range(mm, oldmm, mpnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
426
427
428
429
430
431
  
  		if (tmp->vm_ops && tmp->vm_ops->open)
  			tmp->vm_ops->open(tmp);
  
  		if (retval)
  			goto out;
682968e0c   Srikar Dronamraju   uprobes/core: Opt...
432
433
434
  
  		if (file && uprobe_mmap(tmp))
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
  	}
d6dd61c83   Jeremy Fitzhardinge   [PATCH] x86: PARA...
436
437
  	/* a new mm has just been created */
  	arch_dup_mmap(oldmm, mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
  	retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
439
  out:
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
440
  	up_write(&mm->mmap_sem);
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
441
  	flush_tlb_mm(oldmm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
443
  	up_write(&oldmm->mmap_sem);
  	return retval;
5beb49305   Rik van Riel   mm: change anon_v...
444
445
  fail_nomem_anon_vma_fork:
  	mpol_put(pol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
446
447
448
449
450
451
452
  fail_nomem_policy:
  	kmem_cache_free(vm_area_cachep, tmp);
  fail_nomem:
  	retval = -ENOMEM;
  	vm_unacct_memory(charge);
  	goto out;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
453
  static inline int mm_alloc_pgd(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
454
455
456
457
458
459
  {
  	mm->pgd = pgd_alloc(mm);
  	if (unlikely(!mm->pgd))
  		return -ENOMEM;
  	return 0;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
460
  static inline void mm_free_pgd(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
  {
5e5419734   Benjamin Herrenschmidt   add mm argument t...
462
  	pgd_free(mm, mm->pgd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
463
464
465
466
467
468
  }
  #else
  #define dup_mmap(mm, oldmm)	(0)
  #define mm_alloc_pgd(mm)	(0)
  #define mm_free_pgd(mm)
  #endif /* CONFIG_MMU */
23ff44402   Daniel Walker   whitespace fixes:...
469
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470

e94b17660   Christoph Lameter   [PATCH] slab: rem...
471
  #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
472
  #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
4cb0e11b1   Hidehiro Kawai   coredump_filter: ...
473
474
475
476
477
478
479
480
481
482
483
  static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
  
  static int __init coredump_filter_setup(char *s)
  {
  	default_dump_filter =
  		(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
  		MMF_DUMP_FILTER_MASK;
  	return 1;
  }
  
  __setup("coredump_filter=", coredump_filter_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
484
  #include <linux/init_task.h>
858f09930   Alexey Dobriyan   aio: ifdef fields...
485
486
487
488
489
490
491
  static void mm_init_aio(struct mm_struct *mm)
  {
  #ifdef CONFIG_AIO
  	spin_lock_init(&mm->ioctx_lock);
  	INIT_HLIST_HEAD(&mm->ioctx_list);
  #endif
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
492
  static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
493
494
495
496
497
  {
  	atomic_set(&mm->mm_users, 1);
  	atomic_set(&mm->mm_count, 1);
  	init_rwsem(&mm->mmap_sem);
  	INIT_LIST_HEAD(&mm->mmlist);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
498
499
  	mm->flags = (current->mm) ?
  		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
999d9fc16   Oleg Nesterov   coredump: move mm...
500
  	mm->core_state = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
501
  	mm->nr_ptes = 0;
d559db086   KAMEZAWA Hiroyuki   mm: clean up mm_c...
502
  	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
503
  	spin_lock_init(&mm->page_table_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
504
  	mm->free_area_cache = TASK_UNMAPPED_BASE;
1363c3cd8   Wolfgang Wander   [PATCH] Avoiding ...
505
  	mm->cached_hole_size = ~0UL;
858f09930   Alexey Dobriyan   aio: ifdef fields...
506
  	mm_init_aio(mm);
cf475ad28   Balbir Singh   cgroups: add an o...
507
  	mm_init_owner(mm, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
508
509
510
  
  	if (likely(!mm_alloc_pgd(mm))) {
  		mm->def_flags = 0;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
511
  		mmu_notifier_mm_init(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
512
513
  		return mm;
  	}
78fb74669   Pavel Emelianov   Memory controller...
514

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
515
516
517
  	free_mm(mm);
  	return NULL;
  }
c3f0327f8   Konstantin Khlebnikov   mm: add rss count...
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
  static void check_mm(struct mm_struct *mm)
  {
  	int i;
  
  	for (i = 0; i < NR_MM_COUNTERS; i++) {
  		long x = atomic_long_read(&mm->rss_stat.count[i]);
  
  		if (unlikely(x))
  			printk(KERN_ALERT "BUG: Bad rss-counter state "
  					  "mm:%p idx:%d val:%ld
  ", mm, i, x);
  	}
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  	VM_BUG_ON(mm->pmd_huge_pte);
  #endif
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
535
536
537
  /*
   * Allocate and initialize an mm_struct.
   */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
538
  struct mm_struct *mm_alloc(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
539
  {
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
540
  	struct mm_struct *mm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
541
542
  
  	mm = allocate_mm();
de03c72cf   KOSAKI Motohiro   mm: convert mm->c...
543
544
545
546
  	if (!mm)
  		return NULL;
  
  	memset(mm, 0, sizeof(*mm));
6345d24da   Linus Torvalds   mm: Fix boot cras...
547
548
  	mm_init_cpumask(mm);
  	return mm_init(mm, current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549
550
551
552
553
554
555
  }
  
  /*
   * Called when the last reference to the mm
   * is dropped: either by a lazy thread or by
   * mmput. Free the page directory and the mm.
   */
7ad5b3a50   Harvey Harrison   kernel: remove fa...
556
  void __mmdrop(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
558
559
560
  {
  	BUG_ON(mm == &init_mm);
  	mm_free_pgd(mm);
  	destroy_context(mm);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
561
  	mmu_notifier_mm_destroy(mm);
c3f0327f8   Konstantin Khlebnikov   mm: add rss count...
562
  	check_mm(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563
564
  	free_mm(mm);
  }
6d4e4c4fc   Avi Kivity   KVM: Disallow for...
565
  EXPORT_SYMBOL_GPL(__mmdrop);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566
567
568
569
570
571
  
  /*
   * Decrement the use count and release all resources for an mm.
   */
  void mmput(struct mm_struct *mm)
  {
0ae26f1b3   Andrew Morton   [PATCH] mmput() m...
572
  	might_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
  	if (atomic_dec_and_test(&mm->mm_users)) {
d4b3b6384   Srikar Dronamraju   uprobes/core: All...
574
  		uprobe_clear_state(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
  		exit_aio(mm);
1c2fb7a4c   Andrea Arcangeli   ksm: fix deadlock...
576
  		ksm_exit(mm);
ba76149f4   Andrea Arcangeli   thp: khugepaged
577
  		khugepaged_exit(mm); /* must run before exit_mmap */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578
  		exit_mmap(mm);
925d1c401   Matt Helsley   procfs task exe s...
579
  		set_mm_exe_file(mm, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580
581
582
583
584
  		if (!list_empty(&mm->mmlist)) {
  			spin_lock(&mmlist_lock);
  			list_del(&mm->mmlist);
  			spin_unlock(&mmlist_lock);
  		}
801460d0c   Hiroshi Shimamoto   task_struct clean...
585
586
  		if (mm->binfmt)
  			module_put(mm->binfmt->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
587
588
589
590
  		mmdrop(mm);
  	}
  }
  EXPORT_SYMBOL_GPL(mmput);
386460138   Jiri Slaby   mm: extract exe_f...
591
592
593
594
595
596
597
598
599
600
601
602
603
  /*
   * We added or removed a vma mapping the executable. The vmas are only mapped
   * during exec and are not mapped with the mmap system call.
   * Callers must hold down_write() on the mm's mmap_sem for these
   */
  void added_exe_file_vma(struct mm_struct *mm)
  {
  	mm->num_exe_file_vmas++;
  }
  
  void removed_exe_file_vma(struct mm_struct *mm)
  {
  	mm->num_exe_file_vmas--;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
604
  	if ((mm->num_exe_file_vmas == 0) && mm->exe_file) {
386460138   Jiri Slaby   mm: extract exe_f...
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
  		fput(mm->exe_file);
  		mm->exe_file = NULL;
  	}
  
  }
  
  void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
  {
  	if (new_exe_file)
  		get_file(new_exe_file);
  	if (mm->exe_file)
  		fput(mm->exe_file);
  	mm->exe_file = new_exe_file;
  	mm->num_exe_file_vmas = 0;
  }
  
  struct file *get_mm_exe_file(struct mm_struct *mm)
  {
  	struct file *exe_file;
  
  	/* We need mmap_sem to protect against races with removal of
  	 * VM_EXECUTABLE vmas */
  	down_read(&mm->mmap_sem);
  	exe_file = mm->exe_file;
  	if (exe_file)
  		get_file(exe_file);
  	up_read(&mm->mmap_sem);
  	return exe_file;
  }
  
  static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
  {
  	/* It's safe to write the exe_file pointer without exe_file_lock because
  	 * this is called during fork when the task is not yet in /proc */
  	newmm->exe_file = get_mm_exe_file(oldmm);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
641
642
643
  /**
   * get_task_mm - acquire a reference to the task's mm
   *
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
644
   * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
645
646
647
648
649
650
651
652
653
654
655
656
   * this kernel workthread has transiently adopted a user mm with use_mm,
   * to do its AIO) is not set and if so returns a reference to it, after
   * bumping up the use count.  User must release the mm via mmput()
   * after use.  Typically used by /proc and ptrace.
   */
  struct mm_struct *get_task_mm(struct task_struct *task)
  {
  	struct mm_struct *mm;
  
  	task_lock(task);
  	mm = task->mm;
  	if (mm) {
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
657
  		if (task->flags & PF_KTHREAD)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658
659
660
661
662
663
664
665
  			mm = NULL;
  		else
  			atomic_inc(&mm->mm_users);
  	}
  	task_unlock(task);
  	return mm;
  }
  EXPORT_SYMBOL_GPL(get_task_mm);
8cdb878dc   Christopher Yeoh   Fix race in proce...
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
  struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
  {
  	struct mm_struct *mm;
  	int err;
  
  	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
  	if (err)
  		return ERR_PTR(err);
  
  	mm = get_task_mm(task);
  	if (mm && mm != current->mm &&
  			!ptrace_may_access(task, mode)) {
  		mmput(mm);
  		mm = ERR_PTR(-EACCES);
  	}
  	mutex_unlock(&task->signal->cred_guard_mutex);
  
  	return mm;
  }
57b59c4a1   Oleg Nesterov   coredump_wait: do...
685
  static void complete_vfork_done(struct task_struct *tsk)
c415c3b47   Oleg Nesterov   vfork: introduce ...
686
  {
d68b46fe1   Oleg Nesterov   vfork: make it ki...
687
  	struct completion *vfork;
c415c3b47   Oleg Nesterov   vfork: introduce ...
688

d68b46fe1   Oleg Nesterov   vfork: make it ki...
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
  	task_lock(tsk);
  	vfork = tsk->vfork_done;
  	if (likely(vfork)) {
  		tsk->vfork_done = NULL;
  		complete(vfork);
  	}
  	task_unlock(tsk);
  }
  
  static int wait_for_vfork_done(struct task_struct *child,
  				struct completion *vfork)
  {
  	int killed;
  
  	freezer_do_not_count();
  	killed = wait_for_completion_killable(vfork);
  	freezer_count();
  
  	if (killed) {
  		task_lock(child);
  		child->vfork_done = NULL;
  		task_unlock(child);
  	}
  
  	put_task_struct(child);
  	return killed;
c415c3b47   Oleg Nesterov   vfork: introduce ...
715
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
  /* Please note the differences between mmput and mm_release.
   * mmput is called whenever we stop holding onto a mm_struct,
   * error success whatever.
   *
   * mm_release is called after a mm_struct has been removed
   * from the current process.
   *
   * This difference is important for error handling, when we
   * only half set up a mm_struct for a new process and need to restore
   * the old one.  Because we mmput the new mm_struct before
   * restoring the old one. . .
   * Eric Biederman 10 January 1998
   */
  void mm_release(struct task_struct *tsk, struct mm_struct *mm)
  {
8141c7f3e   Linus Torvalds   Move "exit_robust...
731
732
  	/* Get rid of any futexes when releasing the mm */
  #ifdef CONFIG_FUTEX
fc6b177de   Peter Zijlstra   futex: Nullify ro...
733
  	if (unlikely(tsk->robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
734
  		exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
735
736
  		tsk->robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
737
  #ifdef CONFIG_COMPAT
fc6b177de   Peter Zijlstra   futex: Nullify ro...
738
  	if (unlikely(tsk->compat_robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
739
  		compat_exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
740
741
  		tsk->compat_robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
742
  #endif
322a2c100   Thomas Gleixner   futex: Move exit_...
743
744
  	if (unlikely(!list_empty(&tsk->pi_state_list)))
  		exit_pi_state_list(tsk);
8141c7f3e   Linus Torvalds   Move "exit_robust...
745
  #endif
0326f5a94   Srikar Dronamraju   uprobes/core: Han...
746
  	uprobe_free_utask(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
747
748
  	/* Get rid of any cached register state */
  	deactivate_mm(tsk, mm);
fec1d0115   Roland McGrath   [PATCH] Disable C...
749
750
751
752
  	/*
  	 * If we're exiting normally, clear a user-space tid field if
  	 * requested.  We leave this alone when dying by signal, to leave
  	 * the value intact in a core dump, and to save the unnecessary
d68b46fe1   Oleg Nesterov   vfork: make it ki...
753
754
  	 * trouble, say, a killed vfork parent shouldn't touch this mm.
  	 * Userland only wants this done for a sys_exit.
fec1d0115   Roland McGrath   [PATCH] Disable C...
755
  	 */
9c8a8228d   Eric Dumazet   execve: must clea...
756
757
758
759
760
761
762
763
764
765
766
  	if (tsk->clear_child_tid) {
  		if (!(tsk->flags & PF_SIGNALED) &&
  		    atomic_read(&mm->mm_users) > 1) {
  			/*
  			 * We don't check the error code - if userspace has
  			 * not set up a proper pointer then tough luck.
  			 */
  			put_user(0, tsk->clear_child_tid);
  			sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
  					1, NULL, NULL, 0);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
767
  		tsk->clear_child_tid = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
768
  	}
f7505d64f   Konstantin Khlebnikov   fork: call comple...
769
770
771
772
773
774
775
  
  	/*
  	 * All done, finally we can wake up parent and return this mm to him.
  	 * Also kthread_stop() uses this completion for synchronization.
  	 */
  	if (tsk->vfork_done)
  		complete_vfork_done(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
776
  }
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
777
778
779
780
  /*
   * Allocate a new mm structure and copy contents from the
   * mm structure of the passed in task structure.
   */
402b08622   Carsten Otte   s390: KVM prepara...
781
  struct mm_struct *dup_mm(struct task_struct *tsk)
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
782
783
784
785
786
787
788
789
790
791
792
793
  {
  	struct mm_struct *mm, *oldmm = current->mm;
  	int err;
  
  	if (!oldmm)
  		return NULL;
  
  	mm = allocate_mm();
  	if (!mm)
  		goto fail_nomem;
  
  	memcpy(mm, oldmm, sizeof(*mm));
6345d24da   Linus Torvalds   mm: Fix boot cras...
794
  	mm_init_cpumask(mm);
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
795

e7a00c45f   Andrea Arcangeli   thp: add pmd_huge...
796
797
798
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  	mm->pmd_huge_pte = NULL;
  #endif
d4b3b6384   Srikar Dronamraju   uprobes/core: All...
799
  	uprobe_reset_state(mm);
e7a00c45f   Andrea Arcangeli   thp: add pmd_huge...
800

78fb74669   Pavel Emelianov   Memory controller...
801
  	if (!mm_init(mm, tsk))
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
802
803
804
805
  		goto fail_nomem;
  
  	if (init_new_context(tsk, mm))
  		goto fail_nocontext;
925d1c401   Matt Helsley   procfs task exe s...
806
  	dup_mm_exe_file(oldmm, mm);
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
807
808
809
810
811
812
  	err = dup_mmap(mm, oldmm);
  	if (err)
  		goto free_pt;
  
  	mm->hiwater_rss = get_mm_rss(mm);
  	mm->hiwater_vm = mm->total_vm;
801460d0c   Hiroshi Shimamoto   task_struct clean...
813
814
  	if (mm->binfmt && !try_module_get(mm->binfmt->module))
  		goto free_pt;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
815
816
817
  	return mm;
  
  free_pt:
801460d0c   Hiroshi Shimamoto   task_struct clean...
818
819
  	/* don't put binfmt in mmput, we haven't got module yet */
  	mm->binfmt = NULL;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
820
821
822
823
824
825
826
827
828
829
830
831
832
833
  	mmput(mm);
  
  fail_nomem:
  	return NULL;
  
  fail_nocontext:
  	/*
  	 * If init_new_context() failed, we cannot use mmput() to free the mm
  	 * because it calls destroy_context()
  	 */
  	mm_free_pgd(mm);
  	free_mm(mm);
  	return NULL;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
834
  static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
835
  {
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
836
  	struct mm_struct *mm, *oldmm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
837
838
839
840
  	int retval;
  
  	tsk->min_flt = tsk->maj_flt = 0;
  	tsk->nvcsw = tsk->nivcsw = 0;
17406b82d   Mandeep Singh Baines   softlockup: remov...
841
842
843
  #ifdef CONFIG_DETECT_HUNG_TASK
  	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
  
  	tsk->mm = NULL;
  	tsk->active_mm = NULL;
  
  	/*
  	 * Are we cloning a kernel thread?
  	 *
  	 * We need to steal a active VM for that..
  	 */
  	oldmm = current->mm;
  	if (!oldmm)
  		return 0;
  
  	if (clone_flags & CLONE_VM) {
  		atomic_inc(&oldmm->mm_users);
  		mm = oldmm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
860
861
862
863
  		goto good_mm;
  	}
  
  	retval = -ENOMEM;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
864
  	mm = dup_mm(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
865
866
  	if (!mm)
  		goto fail_nomem;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
867
868
869
870
  good_mm:
  	tsk->mm = mm;
  	tsk->active_mm = mm;
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
871
872
  fail_nomem:
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
873
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
874
  static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
875
  {
498052bba   Al Viro   New locking/refco...
876
  	struct fs_struct *fs = current->fs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
877
  	if (clone_flags & CLONE_FS) {
498052bba   Al Viro   New locking/refco...
878
  		/* tsk->fs is already what we want */
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
879
  		spin_lock(&fs->lock);
498052bba   Al Viro   New locking/refco...
880
  		if (fs->in_exec) {
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
881
  			spin_unlock(&fs->lock);
498052bba   Al Viro   New locking/refco...
882
883
884
  			return -EAGAIN;
  		}
  		fs->users++;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
885
  		spin_unlock(&fs->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
886
887
  		return 0;
  	}
498052bba   Al Viro   New locking/refco...
888
  	tsk->fs = copy_fs_struct(fs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
889
890
891
892
  	if (!tsk->fs)
  		return -ENOMEM;
  	return 0;
  }
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
893
  static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
a016f3389   JANAK DESAI   [PATCH] unshare s...
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
  {
  	struct files_struct *oldf, *newf;
  	int error = 0;
  
  	/*
  	 * A background process may not have any files ...
  	 */
  	oldf = current->files;
  	if (!oldf)
  		goto out;
  
  	if (clone_flags & CLONE_FILES) {
  		atomic_inc(&oldf->count);
  		goto out;
  	}
a016f3389   JANAK DESAI   [PATCH] unshare s...
909
910
911
912
913
914
915
916
917
  	newf = dup_fd(oldf, &error);
  	if (!newf)
  		goto out;
  
  	tsk->files = newf;
  	error = 0;
  out:
  	return error;
  }
fadad878c   Jens Axboe   kernel: add CLONE...
918
  static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
fd0928df9   Jens Axboe   ioprio: move io p...
919
920
921
  {
  #ifdef CONFIG_BLOCK
  	struct io_context *ioc = current->io_context;
6e736be7f   Tejun Heo   block: make ioc g...
922
  	struct io_context *new_ioc;
fd0928df9   Jens Axboe   ioprio: move io p...
923
924
925
  
  	if (!ioc)
  		return 0;
fadad878c   Jens Axboe   kernel: add CLONE...
926
927
928
929
  	/*
  	 * Share io context with parent, if CLONE_IO is set
  	 */
  	if (clone_flags & CLONE_IO) {
3d48749d9   Tejun Heo   block: ioc_task_l...
930
931
  		ioc_task_link(ioc);
  		tsk->io_context = ioc;
fadad878c   Jens Axboe   kernel: add CLONE...
932
  	} else if (ioprio_valid(ioc->ioprio)) {
6e736be7f   Tejun Heo   block: make ioc g...
933
934
  		new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
  		if (unlikely(!new_ioc))
fd0928df9   Jens Axboe   ioprio: move io p...
935
  			return -ENOMEM;
6e736be7f   Tejun Heo   block: make ioc g...
936
  		new_ioc->ioprio = ioc->ioprio;
11a3122f6   Tejun Heo   block: strip out ...
937
  		put_io_context(new_ioc);
fd0928df9   Jens Axboe   ioprio: move io p...
938
939
940
941
  	}
  #endif
  	return 0;
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
942
  static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
943
944
  {
  	struct sighand_struct *sig;
60348802e   Zhaolei   fork.c: cleanup f...
945
  	if (clone_flags & CLONE_SIGHAND) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
946
947
948
949
  		atomic_inc(&current->sighand->count);
  		return 0;
  	}
  	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
e56d09031   Ingo Molnar   [PATCH] RCU signa...
950
  	rcu_assign_pointer(tsk->sighand, sig);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
951
952
  	if (!sig)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
953
954
955
956
  	atomic_set(&sig->count, 1);
  	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
  	return 0;
  }
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
957
  void __cleanup_sighand(struct sighand_struct *sighand)
c81addc9d   Oleg Nesterov   [PATCH] rename __...
958
  {
d80e731ec   Oleg Nesterov   epoll: introduce ...
959
960
  	if (atomic_dec_and_test(&sighand->count)) {
  		signalfd_cleanup(sighand);
c81addc9d   Oleg Nesterov   [PATCH] rename __...
961
  		kmem_cache_free(sighand_cachep, sighand);
d80e731ec   Oleg Nesterov   epoll: introduce ...
962
  	}
c81addc9d   Oleg Nesterov   [PATCH] rename __...
963
  }
f06febc96   Frank Mayhar   timers: fix itime...
964
965
966
967
968
969
  
  /*
   * Initialize POSIX timer handling for a thread group.
   */
  static void posix_cpu_timers_init_group(struct signal_struct *sig)
  {
78d7d407b   Jiri Slaby   kernel core: use ...
970
  	unsigned long cpu_limit;
f06febc96   Frank Mayhar   timers: fix itime...
971
972
  	/* Thread group counters. */
  	thread_group_cputime_init(sig);
78d7d407b   Jiri Slaby   kernel core: use ...
973
974
975
  	cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
  	if (cpu_limit != RLIM_INFINITY) {
  		sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
6279a751f   Oleg Nesterov   posix-timers: fix...
976
977
  		sig->cputimer.running = 1;
  	}
f06febc96   Frank Mayhar   timers: fix itime...
978
979
980
981
982
  	/* The timer lists. */
  	INIT_LIST_HEAD(&sig->cpu_timers[0]);
  	INIT_LIST_HEAD(&sig->cpu_timers[1]);
  	INIT_LIST_HEAD(&sig->cpu_timers[2]);
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
983
  static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
984
985
  {
  	struct signal_struct *sig;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
986

4ab6c0833   Oleg Nesterov   clone(): fix race...
987
  	if (clone_flags & CLONE_THREAD)
490dea45d   Peter Zijlstra   itimers: remove t...
988
  		return 0;
490dea45d   Peter Zijlstra   itimers: remove t...
989

a56704ef6   Veaceslav Falico   copy_signal() cle...
990
  	sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
991
992
993
  	tsk->signal = sig;
  	if (!sig)
  		return -ENOMEM;
b3ac022cb   Oleg Nesterov   proc: turn signal...
994
  	sig->nr_threads = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
995
  	atomic_set(&sig->live, 1);
b3ac022cb   Oleg Nesterov   proc: turn signal...
996
  	atomic_set(&sig->sigcnt, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
997
  	init_waitqueue_head(&sig->wait_chldexit);
b3bfa0cba   Sukadev Bhattiprolu   signals: protect ...
998
999
  	if (clone_flags & CLONE_NEWPID)
  		sig->flags |= SIGNAL_UNKILLABLE;
db51aeccd   Oleg Nesterov   signals: microopt...
1000
  	sig->curr_target = tsk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1001
1002
  	init_sigpending(&sig->shared_pending);
  	INIT_LIST_HEAD(&sig->posix_timers);
c9cb2e3d7   Thomas Gleixner   [PATCH] hrtimers:...
1003
  	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1004
  	sig->real_timer.function = it_real_fn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1005

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1006
1007
1008
  	task_lock(current->group_leader);
  	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
  	task_unlock(current->group_leader);
6279a751f   Oleg Nesterov   posix-timers: fix...
1009
  	posix_cpu_timers_init_group(sig);
522ed7767   Miloslav Trmac   Audit: add TTY in...
1010
  	tty_audit_fork(sig);
5091faa44   Mike Galbraith   sched: Add 'autog...
1011
  	sched_autogroup_fork(sig);
522ed7767   Miloslav Trmac   Audit: add TTY in...
1012

4714d1d32   Ben Blum   cgroups: read-wri...
1013
  #ifdef CONFIG_CGROUPS
257058ae2   Tejun Heo   threadgroup: rena...
1014
  	init_rwsem(&sig->group_rwsem);
4714d1d32   Ben Blum   cgroups: read-wri...
1015
  #endif
28b83c519   KOSAKI Motohiro   oom: move oom_adj...
1016
  	sig->oom_adj = current->signal->oom_adj;
a63d83f42   David Rientjes   oom: badness heur...
1017
  	sig->oom_score_adj = current->signal->oom_score_adj;
dabb16f63   Mandeep Singh Baines   oom: allow a non-...
1018
  	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
28b83c519   KOSAKI Motohiro   oom: move oom_adj...
1019

ebec18a6d   Lennart Poettering   prctl: add PR_{SE...
1020
1021
  	sig->has_child_subreaper = current->signal->has_child_subreaper ||
  				   current->signal->is_child_subreaper;
9b1bf12d5   KOSAKI Motohiro   signals: move cre...
1022
  	mutex_init(&sig->cred_guard_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1023
1024
  	return 0;
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1025
  static void copy_flags(unsigned long clone_flags, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026
1027
  {
  	unsigned long new_flags = p->flags;
21aa9af03   Tejun Heo   sched: add hooks ...
1028
  	new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1029
  	new_flags |= PF_FORKNOEXEC;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1030
1031
  	p->flags = new_flags;
  }
17da2bd90   Heiko Carstens   [CVE-2009-0029] S...
1032
  SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1033
1034
  {
  	current->clear_child_tid = tidptr;
b488893a3   Pavel Emelyanov   pid namespaces: c...
1035
  	return task_pid_vnr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1036
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1037
  static void rt_mutex_init_task(struct task_struct *p)
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
1038
  {
1d6154825   Thomas Gleixner   sched: Convert pi...
1039
  	raw_spin_lock_init(&p->pi_lock);
e29e175b0   Zilvinas Valinskas   [PATCH] initialis...
1040
  #ifdef CONFIG_RT_MUTEXES
732375c6a   Dima Zavin   plist: Remove the...
1041
  	plist_head_init(&p->pi_waiters);
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
1042
  	p->pi_blocked_on = NULL;
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
1043
1044
  #endif
  }
cf475ad28   Balbir Singh   cgroups: add an o...
1045
1046
1047
1048
1049
1050
  #ifdef CONFIG_MM_OWNER
  void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
  {
  	mm->owner = p;
  }
  #endif /* CONFIG_MM_OWNER */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1051
  /*
f06febc96   Frank Mayhar   timers: fix itime...
1052
1053
1054
1055
   * Initialize POSIX timer handling for a single task.
   */
  static void posix_cpu_timers_init(struct task_struct *tsk)
  {
648616343   Martin Schwidefsky   [S390] cputime: a...
1056
1057
  	tsk->cputime_expires.prof_exp = 0;
  	tsk->cputime_expires.virt_exp = 0;
f06febc96   Frank Mayhar   timers: fix itime...
1058
1059
1060
1061
1062
1063
1064
  	tsk->cputime_expires.sched_exp = 0;
  	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1065
1066
1067
1068
1069
1070
1071
   * This creates a new process as a copy of the old one,
   * but does not actually start it yet.
   *
   * It copies the registers, and all the appropriate
   * parts of the process environment (as per the clone
   * flags). The actual kick-off is left to the caller.
   */
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1072
1073
1074
1075
  static struct task_struct *copy_process(unsigned long clone_flags,
  					unsigned long stack_start,
  					struct pt_regs *regs,
  					unsigned long stack_size,
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1076
  					int __user *child_tidptr,
09a05394f   Roland McGrath   tracehook: clone
1077
1078
  					struct pid *pid,
  					int trace)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1079
1080
  {
  	int retval;
a24efe62d   Mariusz Kozlowski   kernel/fork.c: re...
1081
  	struct task_struct *p;
b4f48b636   Paul Menage   Task Control Grou...
1082
  	int cgroup_callbacks_done = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
  
  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
  		return ERR_PTR(-EINVAL);
  
  	/*
  	 * Thread groups must share signals as well, and detached threads
  	 * can only be started up within the thread group.
  	 */
  	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
  		return ERR_PTR(-EINVAL);
  
  	/*
  	 * Shared signal handlers imply shared VM. By way of the above,
  	 * thread groups also imply shared VM. Blocking this case allows
  	 * for various simplifications in other code.
  	 */
  	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
  		return ERR_PTR(-EINVAL);
123be07b0   Sukadev Bhattiprolu   fork(): disable C...
1101
1102
1103
1104
1105
1106
1107
1108
1109
  	/*
  	 * Siblings of global init remain as zombies on exit since they are
  	 * not reaped by their parent (swapper). To solve this and to avoid
  	 * multi-rooted process trees, prevent global and container-inits
  	 * from creating siblings.
  	 */
  	if ((clone_flags & CLONE_PARENT) &&
  				current->signal->flags & SIGNAL_UNKILLABLE)
  		return ERR_PTR(-EINVAL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1110
1111
1112
1113
1114
1115
1116
1117
  	retval = security_task_create(clone_flags);
  	if (retval)
  		goto fork_out;
  
  	retval = -ENOMEM;
  	p = dup_task_struct(current);
  	if (!p)
  		goto fork_out;
f7e8b616e   Steven Rostedt   function-graph: m...
1118
  	ftrace_graph_init_task(p);
e2cfabdfd   Will Drewry   seccomp: add syst...
1119
  	get_seccomp_filter(p);
f7e8b616e   Steven Rostedt   function-graph: m...
1120

bea493a03   Peter Zijlstra   [PATCH] rt-mutex:...
1121
  	rt_mutex_init_task(p);
d12c1a379   Ingo Molnar   lockdep: fix kern...
1122
  #ifdef CONFIG_PROVE_LOCKING
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1123
1124
1125
  	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
  	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1126
  	retval = -EAGAIN;
3b11a1dec   David Howells   CRED: Differentia...
1127
  	if (atomic_read(&p->real_cred->user->processes) >=
78d7d407b   Jiri Slaby   kernel core: use ...
1128
  			task_rlimit(p, RLIMIT_NPROC)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1129
  		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
18b6e0414   Serge Hallyn   User namespaces: ...
1130
  		    p->real_cred->user != INIT_USER)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1131
1132
  			goto bad_fork_free;
  	}
72fa59970   Vasiliy Kulikov   move RLIMIT_NPROC...
1133
  	current->flags &= ~PF_NPROC_EXCEEDED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1134

f1752eec6   David Howells   CRED: Detach the ...
1135
1136
1137
  	retval = copy_creds(p, clone_flags);
  	if (retval < 0)
  		goto bad_fork_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1138
1139
1140
1141
1142
1143
  
  	/*
  	 * If multiple threads are within copy_process(), then this check
  	 * triggers too late. This doesn't hurt, the check is only there
  	 * to stop root fork bombs.
  	 */
04ec93fe9   Li Zefan   fork.c: fix NULL ...
1144
  	retval = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1145
1146
  	if (nr_threads >= max_threads)
  		goto bad_fork_cleanup_count;
a1261f546   Al Viro   [PATCH] m68k: int...
1147
  	if (!try_module_get(task_thread_info(p)->exec_domain->module))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1148
  		goto bad_fork_cleanup_count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1149
  	p->did_exec = 0;
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
1150
  	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1151
  	copy_flags(clone_flags, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1152
1153
  	INIT_LIST_HEAD(&p->children);
  	INIT_LIST_HEAD(&p->sibling);
f41d911f8   Paul E. McKenney   rcu: Merge preemp...
1154
  	rcu_copy_process(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1155
1156
  	p->vfork_done = NULL;
  	spin_lock_init(&p->alloc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1157

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1158
  	init_sigpending(&p->pending);
648616343   Martin Schwidefsky   [S390] cputime: a...
1159
1160
  	p->utime = p->stime = p->gtime = 0;
  	p->utimescaled = p->stimescaled = 0;
d99ca3b97   Hidetoshi Seto   sched, cputime: C...
1161
  #ifndef CONFIG_VIRT_CPU_ACCOUNTING
648616343   Martin Schwidefsky   [S390] cputime: a...
1162
  	p->prev_utime = p->prev_stime = 0;
d99ca3b97   Hidetoshi Seto   sched, cputime: C...
1163
  #endif
a3a2e76c7   KAMEZAWA Hiroyuki   mm: avoid null-po...
1164
1165
1166
  #if defined(SPLIT_RSS_COUNTING)
  	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
  #endif
172ba844a   Balbir Singh   sched: update del...
1167

6976675d9   Arjan van de Ven   hrtimer: create a...
1168
  	p->default_timer_slack_ns = current->timer_slack_ns;
5995477ab   Andrea Righi   task IO accountin...
1169
  	task_io_accounting_init(&p->ioac);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1170
  	acct_clear_integrals(p);
f06febc96   Frank Mayhar   timers: fix itime...
1171
  	posix_cpu_timers_init(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1172

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1173
  	do_posix_clock_monotonic_gettime(&p->start_time);
924b42d5a   Tomas Janousek   Use boot based ti...
1174
1175
  	p->real_start_time = p->start_time;
  	monotonic_to_bootbased(&p->real_start_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1176
  	p->io_context = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1177
  	p->audit_context = NULL;
4714d1d32   Ben Blum   cgroups: read-wri...
1178
  	if (clone_flags & CLONE_THREAD)
257058ae2   Tejun Heo   threadgroup: rena...
1179
  		threadgroup_change_begin(current);
b4f48b636   Paul Menage   Task Control Grou...
1180
  	cgroup_fork(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1181
  #ifdef CONFIG_NUMA
846a16bf0   Lee Schermerhorn   mempolicy: rename...
1182
  	p->mempolicy = mpol_dup(p->mempolicy);
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1183
1184
1185
1186
1187
  	if (IS_ERR(p->mempolicy)) {
  		retval = PTR_ERR(p->mempolicy);
  		p->mempolicy = NULL;
  		goto bad_fork_cleanup_cgroup;
  	}
c61afb181   Paul Jackson   [PATCH] cpuset me...
1188
  	mpol_fix_fork_child_flag(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1189
  #endif
778d3b0ff   Michal Hocko   cpusets: randomiz...
1190
1191
1192
  #ifdef CONFIG_CPUSETS
  	p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
  	p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
cc9a6c877   Mel Gorman   cpuset: mm: reduc...
1193
  	seqcount_init(&p->mems_allowed_seq);
778d3b0ff   Michal Hocko   cpusets: randomiz...
1194
  #endif
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1195
1196
  #ifdef CONFIG_TRACE_IRQFLAGS
  	p->irq_events = 0;
b36e4758d   Russell King   [ARM] Fix kernel/...
1197
1198
1199
  #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
  	p->hardirqs_enabled = 1;
  #else
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1200
  	p->hardirqs_enabled = 0;
b36e4758d   Russell King   [ARM] Fix kernel/...
1201
  #endif
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
  	p->hardirq_enable_ip = 0;
  	p->hardirq_enable_event = 0;
  	p->hardirq_disable_ip = _THIS_IP_;
  	p->hardirq_disable_event = 0;
  	p->softirqs_enabled = 1;
  	p->softirq_enable_ip = _THIS_IP_;
  	p->softirq_enable_event = 0;
  	p->softirq_disable_ip = 0;
  	p->softirq_disable_event = 0;
  	p->hardirq_context = 0;
  	p->softirq_context = 0;
  #endif
fbb9ce953   Ingo Molnar   [PATCH] lockdep: ...
1214
1215
1216
1217
1218
  #ifdef CONFIG_LOCKDEP
  	p->lockdep_depth = 0; /* no locks held yet */
  	p->curr_chain_key = 0;
  	p->lockdep_recursion = 0;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1219

408894ee4   Ingo Molnar   [PATCH] mutex sub...
1220
1221
1222
  #ifdef CONFIG_DEBUG_MUTEXES
  	p->blocked_on = NULL; /* not blocked yet */
  #endif
569b846df   KAMEZAWA Hiroyuki   memcg: coalesce u...
1223
1224
1225
1226
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  	p->memcg_batch.do_batch = 0;
  	p->memcg_batch.memcg = NULL;
  #endif
0f4814065   Markus Metzger   x86, ptrace: add ...
1227

3c90e6e99   Srivatsa Vaddagiri   sched: fix copy_n...
1228
  	/* Perform scheduler related setup. Assign this task to a CPU. */
3e51e3edf   Samir Bellabes   sched: Remove unu...
1229
  	sched_fork(p);
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1230

cdd6c482c   Ingo Molnar   perf: Do the big ...
1231
  	retval = perf_event_init_task(p);
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1232
1233
  	if (retval)
  		goto bad_fork_cleanup_policy;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1234
1235
  	retval = audit_alloc(p);
  	if (retval)
f1752eec6   David Howells   CRED: Detach the ...
1236
  		goto bad_fork_cleanup_policy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1237
  	/* copy all the process information */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1238
1239
  	retval = copy_semundo(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1240
  		goto bad_fork_cleanup_audit;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1241
1242
  	retval = copy_files(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1243
  		goto bad_fork_cleanup_semundo;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1244
1245
  	retval = copy_fs(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1246
  		goto bad_fork_cleanup_files;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1247
1248
  	retval = copy_sighand(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1249
  		goto bad_fork_cleanup_fs;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1250
1251
  	retval = copy_signal(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1252
  		goto bad_fork_cleanup_sighand;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1253
1254
  	retval = copy_mm(clone_flags, p);
  	if (retval)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
  		goto bad_fork_cleanup_signal;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1256
1257
  	retval = copy_namespaces(clone_flags, p);
  	if (retval)
d84f4f992   David Howells   CRED: Inaugurate ...
1258
  		goto bad_fork_cleanup_mm;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1259
1260
  	retval = copy_io(clone_flags, p);
  	if (retval)
fd0928df9   Jens Axboe   ioprio: move io p...
1261
  		goto bad_fork_cleanup_namespaces;
6f2c55b84   Alexey Dobriyan   Simplify copy_thr...
1262
  	retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1263
  	if (retval)
fd0928df9   Jens Axboe   ioprio: move io p...
1264
  		goto bad_fork_cleanup_io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1265

425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1266
1267
  	if (pid != &init_struct_pid) {
  		retval = -ENOMEM;
61bce0f13   Eric W. Biederman   pid: generalize t...
1268
  		pid = alloc_pid(p->nsproxy->pid_ns);
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1269
  		if (!pid)
fd0928df9   Jens Axboe   ioprio: move io p...
1270
  			goto bad_fork_cleanup_io;
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1271
1272
1273
1274
1275
1276
  	}
  
  	p->pid = pid_nr(pid);
  	p->tgid = p->pid;
  	if (clone_flags & CLONE_THREAD)
  		p->tgid = current->tgid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1277
1278
1279
1280
  	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
  	/*
  	 * Clear TID on mm_release()?
  	 */
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1281
  	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
73c101011   Jens Axboe   block: initial pa...
1282
1283
1284
  #ifdef CONFIG_BLOCK
  	p->plug = NULL;
  #endif
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1285
  #ifdef CONFIG_FUTEX
8f17d3a50   Ingo Molnar   [PATCH] lightweig...
1286
1287
1288
1289
  	p->robust_list = NULL;
  #ifdef CONFIG_COMPAT
  	p->compat_robust_list = NULL;
  #endif
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1290
1291
  	INIT_LIST_HEAD(&p->pi_state_list);
  	p->pi_state_cache = NULL;
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1292
  #endif
0326f5a94   Srikar Dronamraju   uprobes/core: Han...
1293
  	uprobe_copy_process(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1294
  	/*
f9a3879ab   GOTO Masanori   [PATCH] Fix sigal...
1295
1296
1297
1298
1299
1300
  	 * sigaltstack should be cleared when sharing the same VM
  	 */
  	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
  		p->sas_ss_sp = p->sas_ss_size = 0;
  
  	/*
6580807da   Oleg Nesterov   ptrace: copy_proc...
1301
1302
  	 * Syscall tracing and stepping should be turned off in the
  	 * child regardless of CLONE_PTRACE.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1303
  	 */
6580807da   Oleg Nesterov   ptrace: copy_proc...
1304
  	user_disable_single_step(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1305
  	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
ed75e8d58   Laurent Vivier   [PATCH] UML Suppo...
1306
1307
1308
  #ifdef TIF_SYSCALL_EMU
  	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
  #endif
9745512ce   Arjan van de Ven   sched: latencytop...
1309
  	clear_all_latency_tracing(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1310

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311
  	/* ok, now we should be set up.. */
5f8aadd8b   Oleg Nesterov   CLONE_PARENT shou...
1312
1313
1314
1315
1316
1317
  	if (clone_flags & CLONE_THREAD)
  		p->exit_signal = -1;
  	else if (clone_flags & CLONE_PARENT)
  		p->exit_signal = current->group_leader->exit_signal;
  	else
  		p->exit_signal = (clone_flags & CSIGNAL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1318
1319
  	p->pdeath_signal = 0;
  	p->exit_state = 0;
9d823e8f6   Wu Fengguang   writeback: per ta...
1320
1321
  	p->nr_dirtied = 0;
  	p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
83712358b   Wu Fengguang   writeback: dirty ...
1322
  	p->dirty_paused_when = 0;
9d823e8f6   Wu Fengguang   writeback: per ta...
1323

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1324
1325
1326
1327
1328
  	/*
  	 * Ok, make it visible to the rest of the system.
  	 * We dont wake it up yet.
  	 */
  	p->group_leader = p;
47e65328a   Oleg Nesterov   [PATCH] pids: kil...
1329
  	INIT_LIST_HEAD(&p->thread_group);
e73f8959a   Oleg Nesterov   task_work_add: ge...
1330
  	INIT_HLIST_HEAD(&p->task_works);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1331

b4f48b636   Paul Menage   Task Control Grou...
1332
1333
1334
1335
1336
  	/* Now that the task is set up, run cgroup callbacks if
  	 * necessary. We need to run them before the task is visible
  	 * on the tasklist. */
  	cgroup_fork_callbacks(p);
  	cgroup_callbacks_done = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1337
1338
  	/* Need tasklist lock for parent etc handling! */
  	write_lock_irq(&tasklist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1339
  	/* CLONE_PARENT re-uses the old parent */
2d5516cbb   Oleg Nesterov   copy_process: fix...
1340
  	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1341
  		p->real_parent = current->real_parent;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1342
1343
  		p->parent_exec_id = current->parent_exec_id;
  	} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1344
  		p->real_parent = current;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1345
1346
  		p->parent_exec_id = current->self_exec_id;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1347

3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1348
  	spin_lock(&current->sighand->siglock);
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1349
1350
1351
1352
1353
1354
1355
1356
  
  	/*
  	 * Process group and session signals need to be delivered to just the
  	 * parent before the fork or both the parent and the child after the
  	 * fork. Restart if a signal comes in before we add the new process to
  	 * it's process group.
  	 * A fatal signal pending means that current will exit, so the new
  	 * thread can't slip out of an OOM kill (or normal SIGKILL).
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1357
  	*/
23ff44402   Daniel Walker   whitespace fixes:...
1358
  	recalc_sigpending();
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1359
1360
1361
1362
  	if (signal_pending(current)) {
  		spin_unlock(&current->sighand->siglock);
  		write_unlock_irq(&tasklist_lock);
  		retval = -ERESTARTNOINTR;
f7e8b616e   Steven Rostedt   function-graph: m...
1363
  		goto bad_fork_free_pid;
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1364
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1365
  	if (clone_flags & CLONE_THREAD) {
b3ac022cb   Oleg Nesterov   proc: turn signal...
1366
  		current->signal->nr_threads++;
4ab6c0833   Oleg Nesterov   clone(): fix race...
1367
  		atomic_inc(&current->signal->live);
b3ac022cb   Oleg Nesterov   proc: turn signal...
1368
  		atomic_inc(&current->signal->sigcnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1369
  		p->group_leader = current->group_leader;
47e65328a   Oleg Nesterov   [PATCH] pids: kil...
1370
  		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1371
  	}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1372
  	if (likely(p->pid)) {
4b9d33e6d   Tejun Heo   ptrace: kill clon...
1373
  		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1374
1375
  
  		if (thread_group_leader(p)) {
45a68628d   Eric W. Biederman   pid: remove the c...
1376
  			if (is_child_reaper(pid))
30e49c263   Pavel Emelyanov   pid namespaces: a...
1377
  				p->nsproxy->pid_ns->child_reaper = p;
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1378

fea9d1755   Oleg Nesterov   ITIMER_REAL: conv...
1379
  			p->signal->leader_pid = pid;
9c9f4ded9   Alan Cox   tty: Add a kref c...
1380
  			p->signal->tty = tty_kref_get(current->signal->tty);
5cd17569f   Eric W. Biederman   fix clone(CLONE_N...
1381
1382
  			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
  			attach_pid(p, PIDTYPE_SID, task_session(current));
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
1383
  			list_add_tail(&p->sibling, &p->real_parent->children);
5e85d4abe   Eric W. Biederman   [PATCH] task: Mak...
1384
  			list_add_tail_rcu(&p->tasks, &init_task.tasks);
909ea9646   Christoph Lameter   core: Replace __g...
1385
  			__this_cpu_inc(process_counts);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1386
  		}
85868995d   Sukadev Bhattiprolu   Use struct pid pa...
1387
  		attach_pid(p, PIDTYPE_PID, pid);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1388
  		nr_threads++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1389
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1390
  	total_forks++;
3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1391
  	spin_unlock(&current->sighand->siglock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
  	write_unlock_irq(&tasklist_lock);
c13cf856c   Andrew Morton   [PATCH] fork.c: p...
1393
  	proc_fork_connector(p);
817929ec2   Paul Menage   Task Control Grou...
1394
  	cgroup_post_fork(p);
4714d1d32   Ben Blum   cgroups: read-wri...
1395
  	if (clone_flags & CLONE_THREAD)
257058ae2   Tejun Heo   threadgroup: rena...
1396
  		threadgroup_change_end(current);
cdd6c482c   Ingo Molnar   perf: Do the big ...
1397
  	perf_event_fork(p);
43d2b1132   KAMEZAWA Hiroyuki   tracepoint: add t...
1398
1399
  
  	trace_task_newtask(p, clone_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1400
  	return p;
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1401
1402
1403
  bad_fork_free_pid:
  	if (pid != &init_struct_pid)
  		free_pid(pid);
fd0928df9   Jens Axboe   ioprio: move io p...
1404
  bad_fork_cleanup_io:
b69f22920   Louis Rilling   block: Fix io_con...
1405
1406
  	if (p->io_context)
  		exit_io_context(p);
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1407
  bad_fork_cleanup_namespaces:
5e2bf0142   Mike Galbraith   namespaces, pid_n...
1408
1409
  	if (unlikely(clone_flags & CLONE_NEWPID))
  		pid_ns_release_proc(p->nsproxy->pid_ns);
444f378b2   Linus Torvalds   Revert "[PATCH] n...
1410
  	exit_task_namespaces(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1411
  bad_fork_cleanup_mm:
c9f01245b   David Rientjes   oom: remove oom_d...
1412
  	if (p->mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1413
1414
  		mmput(p->mm);
  bad_fork_cleanup_signal:
4ab6c0833   Oleg Nesterov   clone(): fix race...
1415
  	if (!(clone_flags & CLONE_THREAD))
1c5354de9   Mike Galbraith   sched: Move sched...
1416
  		free_signal_struct(p->signal);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1417
  bad_fork_cleanup_sighand:
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
1418
  	__cleanup_sighand(p->sighand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1419
1420
1421
1422
1423
1424
1425
1426
  bad_fork_cleanup_fs:
  	exit_fs(p); /* blocking */
  bad_fork_cleanup_files:
  	exit_files(p); /* blocking */
  bad_fork_cleanup_semundo:
  	exit_sem(p);
  bad_fork_cleanup_audit:
  	audit_free(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1427
  bad_fork_cleanup_policy:
cdd6c482c   Ingo Molnar   perf: Do the big ...
1428
  	perf_event_free_task(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1429
  #ifdef CONFIG_NUMA
f0be3d32b   Lee Schermerhorn   mempolicy: rename...
1430
  	mpol_put(p->mempolicy);
b4f48b636   Paul Menage   Task Control Grou...
1431
  bad_fork_cleanup_cgroup:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1432
  #endif
4714d1d32   Ben Blum   cgroups: read-wri...
1433
  	if (clone_flags & CLONE_THREAD)
257058ae2   Tejun Heo   threadgroup: rena...
1434
  		threadgroup_change_end(current);
b4f48b636   Paul Menage   Task Control Grou...
1435
  	cgroup_exit(p, cgroup_callbacks_done);
35df17c57   Shailabh Nagar   [PATCH] task dela...
1436
  	delayacct_tsk_free(p);
a1261f546   Al Viro   [PATCH] m68k: int...
1437
  	module_put(task_thread_info(p)->exec_domain->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1438
  bad_fork_cleanup_count:
d84f4f992   David Howells   CRED: Inaugurate ...
1439
  	atomic_dec(&p->cred->user->processes);
e0e817392   David Howells   CRED: Add some co...
1440
  	exit_creds(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1441
1442
  bad_fork_free:
  	free_task(p);
fe7d37d1f   Oleg Nesterov   [PATCH] copy_proc...
1443
1444
  fork_out:
  	return ERR_PTR(retval);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1445
  }
6b2fb3c65   Adrian Bunk   idle_regs() must ...
1446
  noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1447
1448
1449
1450
  {
  	memset(regs, 0, sizeof(struct pt_regs));
  	return regs;
  }
f106eee10   Oleg Nesterov   pids: fix fork_id...
1451
1452
1453
1454
1455
1456
1457
1458
1459
  static inline void init_idle_pids(struct pid_link *links)
  {
  	enum pid_type type;
  
  	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
  		INIT_HLIST_NODE(&links[type].node); /* not really needed */
  		links[type].pid = &init_struct_pid;
  	}
  }
9abcf40b1   Al Viro   [PATCH] fork_idle...
1460
  struct task_struct * __cpuinit fork_idle(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1461
  {
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1462
  	struct task_struct *task;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1463
  	struct pt_regs regs;
30e49c263   Pavel Emelyanov   pid namespaces: a...
1464
  	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
09a05394f   Roland McGrath   tracehook: clone
1465
  			    &init_struct_pid, 0);
f106eee10   Oleg Nesterov   pids: fix fork_id...
1466
1467
  	if (!IS_ERR(task)) {
  		init_idle_pids(task->pids);
753ca4f31   Akinobu Mita   [PATCH] fix copy_...
1468
  		init_idle(task, cpu);
f106eee10   Oleg Nesterov   pids: fix fork_id...
1469
  	}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1470

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1471
1472
  	return task;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
  /*
   *  Ok, this is the main fork-routine.
   *
   * It copies the process, and if successful kick-starts
   * it and waits for it to finish using the VM if required.
   */
  long do_fork(unsigned long clone_flags,
  	      unsigned long stack_start,
  	      struct pt_regs *regs,
  	      unsigned long stack_size,
  	      int __user *parent_tidptr,
  	      int __user *child_tidptr)
  {
  	struct task_struct *p;
  	int trace = 0;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1488
  	long nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1489

bdff746a3   Andrew Morton   clone: prepare to...
1490
  	/*
18b6e0414   Serge Hallyn   User namespaces: ...
1491
1492
1493
1494
1495
1496
1497
1498
1499
  	 * Do some preliminary argument and permissions checking before we
  	 * actually start allocating stuff
  	 */
  	if (clone_flags & CLONE_NEWUSER) {
  		if (clone_flags & CLONE_THREAD)
  			return -EINVAL;
  		/* hopefully this check will go away when userns support is
  		 * complete
  		 */
7657d9049   Serge E. Hallyn   user namespaces: ...
1500
1501
  		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
  				!capable(CAP_SETGID))
18b6e0414   Serge Hallyn   User namespaces: ...
1502
1503
1504
1505
  			return -EPERM;
  	}
  
  	/*
4b9d33e6d   Tejun Heo   ptrace: kill clon...
1506
1507
1508
1509
  	 * Determine whether and which event to report to ptracer.  When
  	 * called from kernel_thread or CLONE_UNTRACED is explicitly
  	 * requested, no event is reported; otherwise, report if the event
  	 * for the type of forking is enabled.
09a05394f   Roland McGrath   tracehook: clone
1510
  	 */
4b9d33e6d   Tejun Heo   ptrace: kill clon...
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
  	if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) {
  		if (clone_flags & CLONE_VFORK)
  			trace = PTRACE_EVENT_VFORK;
  		else if ((clone_flags & CSIGNAL) != SIGCHLD)
  			trace = PTRACE_EVENT_CLONE;
  		else
  			trace = PTRACE_EVENT_FORK;
  
  		if (likely(!ptrace_event_enabled(current, trace)))
  			trace = 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1522

a6f5e0637   Sukadev Bhattiprolu   pid namespaces: m...
1523
  	p = copy_process(clone_flags, stack_start, regs, stack_size,
09a05394f   Roland McGrath   tracehook: clone
1524
  			 child_tidptr, NULL, trace);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1525
1526
1527
1528
1529
1530
  	/*
  	 * Do this prior waking up the new thread - the thread pointer
  	 * might get invalid after that point, if the thread exits quickly.
  	 */
  	if (!IS_ERR(p)) {
  		struct completion vfork;
0a16b6075   Mathieu Desnoyers   tracing, sched: L...
1531
  		trace_sched_process_fork(current, p);
6c5f3e7b4   Pavel Emelyanov   Pidns: make full ...
1532
  		nr = task_pid_vnr(p);
30e49c263   Pavel Emelyanov   pid namespaces: a...
1533
1534
1535
  
  		if (clone_flags & CLONE_PARENT_SETTID)
  			put_user(nr, parent_tidptr);
a6f5e0637   Sukadev Bhattiprolu   pid namespaces: m...
1536

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1537
1538
1539
  		if (clone_flags & CLONE_VFORK) {
  			p->vfork_done = &vfork;
  			init_completion(&vfork);
d68b46fe1   Oleg Nesterov   vfork: make it ki...
1540
  			get_task_struct(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1541
  		}
3e51e3edf   Samir Bellabes   sched: Remove unu...
1542
  		wake_up_new_task(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1543

4b9d33e6d   Tejun Heo   ptrace: kill clon...
1544
1545
1546
  		/* forking complete and child started to run, tell ptracer */
  		if (unlikely(trace))
  			ptrace_event(trace, nr);
09a05394f   Roland McGrath   tracehook: clone
1547

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1548
  		if (clone_flags & CLONE_VFORK) {
d68b46fe1   Oleg Nesterov   vfork: make it ki...
1549
1550
  			if (!wait_for_vfork_done(p, &vfork))
  				ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1551
1552
  		}
  	} else {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1553
  		nr = PTR_ERR(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1554
  	}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1555
  	return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1556
  }
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
1557
1558
1559
  #ifndef ARCH_MIN_MMSTRUCT_ALIGN
  #define ARCH_MIN_MMSTRUCT_ALIGN 0
  #endif
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
1560
  static void sighand_ctor(void *data)
aa1757f90   Oleg Nesterov   [PATCH] convert s...
1561
1562
  {
  	struct sighand_struct *sighand = data;
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
1563
  	spin_lock_init(&sighand->siglock);
b8fceee17   Davide Libenzi   signalfd simplifi...
1564
  	init_waitqueue_head(&sighand->signalfd_wqh);
aa1757f90   Oleg Nesterov   [PATCH] convert s...
1565
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1566
1567
1568
1569
  void __init proc_caches_init(void)
  {
  	sighand_cachep = kmem_cache_create("sighand_cache",
  			sizeof(struct sighand_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1570
1571
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
  			SLAB_NOTRACK, sighand_ctor);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1572
1573
  	signal_cachep = kmem_cache_create("signal_cache",
  			sizeof(struct signal_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1574
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
1575
  	files_cachep = kmem_cache_create("files_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1576
  			sizeof(struct files_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1577
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
1578
  	fs_cachep = kmem_cache_create("fs_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1579
  			sizeof(struct fs_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1580
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
6345d24da   Linus Torvalds   mm: Fix boot cras...
1581
1582
1583
1584
1585
1586
1587
  	/*
  	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
  	 * whole struct cpumask for the OFFSTACK case. We could change
  	 * this to *only* allocate as much of it as required by the
  	 * maximum number of CPU's we can ever have.  The cpumask_allocation
  	 * is at the end of the structure, exactly for that reason.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1588
  	mm_cachep = kmem_cache_create("mm_struct",
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
1589
  			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1590
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
33e5d7697   David Howells   nommu: fix a numb...
1591
  	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
8feae1311   David Howells   NOMMU: Make VMAs ...
1592
  	mmap_init();
665771939   Al Viro   make sure that ns...
1593
  	nsproxy_cache_init();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1594
  }
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1595

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1596
  /*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1597
   * Check constraints on flags passed to the unshare system call.
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1598
   */
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1599
  static int check_unshare_flags(unsigned long unshare_flags)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1600
  {
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1601
1602
1603
1604
  	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
  				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
  				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
  		return -EINVAL;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1605
  	/*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1606
1607
1608
  	 * Not implemented, but pretend it works if there is nothing to
  	 * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
  	 * needs to unshare vm.
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1609
  	 */
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1610
1611
1612
1613
1614
  	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
  		/* FIXME: get_task_mm() increments ->mm_users */
  		if (atomic_read(&current->mm->mm_users) > 1)
  			return -EINVAL;
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1615
1616
1617
1618
1619
  
  	return 0;
  }
  
  /*
99d1419d9   JANAK DESAI   [PATCH] unshare s...
1620
   * Unshare the filesystem structure if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1621
1622
1623
1624
   */
  static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
  {
  	struct fs_struct *fs = current->fs;
498052bba   Al Viro   New locking/refco...
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
  	if (!(unshare_flags & CLONE_FS) || !fs)
  		return 0;
  
  	/* don't need lock here; in the worst case we'll do useless copy */
  	if (fs->users == 1)
  		return 0;
  
  	*new_fsp = copy_fs_struct(fs);
  	if (!*new_fsp)
  		return -ENOMEM;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1635
1636
1637
1638
1639
  
  	return 0;
  }
  
  /*
a016f3389   JANAK DESAI   [PATCH] unshare s...
1640
   * Unshare file descriptor table if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1641
1642
1643
1644
   */
  static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
  {
  	struct files_struct *fd = current->files;
a016f3389   JANAK DESAI   [PATCH] unshare s...
1645
  	int error = 0;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1646
1647
  
  	if ((unshare_flags & CLONE_FILES) &&
a016f3389   JANAK DESAI   [PATCH] unshare s...
1648
1649
1650
1651
1652
  	    (fd && atomic_read(&fd->count) > 1)) {
  		*new_fdp = dup_fd(fd, &error);
  		if (!*new_fdp)
  			return error;
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1653
1654
1655
1656
1657
  
  	return 0;
  }
  
  /*
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1658
1659
1660
1661
1662
1663
1664
   * unshare allows a process to 'unshare' part of the process
   * context which was originally shared using clone.  copy_*
   * functions used by do_fork() cannot be used here directly
   * because they modify an inactive task_struct that is being
   * constructed. Here we are modifying the current, active,
   * task_struct.
   */
6559eed8c   Heiko Carstens   [CVE-2009-0029] S...
1665
  SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1666
  {
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1667
  	struct fs_struct *fs, *new_fs = NULL;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1668
  	struct files_struct *fd, *new_fd = NULL;
cf7b708c8   Pavel Emelyanov   Make access to ta...
1669
  	struct nsproxy *new_nsproxy = NULL;
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1670
  	int do_sysvsem = 0;
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1671
  	int err;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1672

9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1673
1674
  	err = check_unshare_flags(unshare_flags);
  	if (err)
06f9d4f94   Eric W. Biederman   [PATCH] unshare: ...
1675
  		goto bad_unshare_out;
6013f67fc   Manfred Spraul   ipc: sysvsem: for...
1676
  	/*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1677
1678
1679
1680
1681
  	 * If unsharing namespace, must also unshare filesystem information.
  	 */
  	if (unshare_flags & CLONE_NEWNS)
  		unshare_flags |= CLONE_FS;
  	/*
6013f67fc   Manfred Spraul   ipc: sysvsem: for...
1682
1683
1684
1685
1686
  	 * CLONE_NEWIPC must also detach from the undolist: after switching
  	 * to a new ipc namespace, the semaphore arrays from the old
  	 * namespace are unreachable.
  	 */
  	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1687
  		do_sysvsem = 1;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1688
1689
  	err = unshare_fs(unshare_flags, &new_fs);
  	if (err)
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1690
  		goto bad_unshare_out;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1691
1692
  	err = unshare_fd(unshare_flags, &new_fd);
  	if (err)
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1693
  		goto bad_unshare_cleanup_fs;
fb0a685cb   Daniel Rebelo de Oliveira   kernel/fork.c: fi...
1694
1695
  	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs);
  	if (err)
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1696
  		goto bad_unshare_cleanup_fd;
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1697

9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1698
  	if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1699
1700
1701
1702
1703
1704
  		if (do_sysvsem) {
  			/*
  			 * CLONE_SYSVSEM is equivalent to sys_exit().
  			 */
  			exit_sem(current);
  		}
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1705

c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1706
  		if (new_nsproxy) {
cf7b708c8   Pavel Emelyanov   Make access to ta...
1707
1708
  			switch_task_namespaces(current, new_nsproxy);
  			new_nsproxy = NULL;
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1709
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1710

cf7b708c8   Pavel Emelyanov   Make access to ta...
1711
  		task_lock(current);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1712
1713
  		if (new_fs) {
  			fs = current->fs;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1714
  			spin_lock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1715
  			current->fs = new_fs;
498052bba   Al Viro   New locking/refco...
1716
1717
1718
1719
  			if (--fs->users)
  				new_fs = NULL;
  			else
  				new_fs = fs;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1720
  			spin_unlock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1721
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1722
1723
1724
1725
1726
1727
1728
1729
  		if (new_fd) {
  			fd = current->files;
  			current->files = new_fd;
  			new_fd = fd;
  		}
  
  		task_unlock(current);
  	}
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1730
  	if (new_nsproxy)
444f378b2   Linus Torvalds   Revert "[PATCH] n...
1731
  		put_nsproxy(new_nsproxy);
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1732

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1733
1734
1735
  bad_unshare_cleanup_fd:
  	if (new_fd)
  		put_files_struct(new_fd);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1736
1737
  bad_unshare_cleanup_fs:
  	if (new_fs)
498052bba   Al Viro   New locking/refco...
1738
  		free_fs_struct(new_fs);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1739

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1740
1741
1742
  bad_unshare_out:
  	return err;
  }
3b1253880   Al Viro   [PATCH] sanitize ...
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
  
  /*
   *	Helper to unshare the files of the current task.
   *	We don't want to expose copy_files internals to
   *	the exec layer of the kernel.
   */
  
  int unshare_files(struct files_struct **displaced)
  {
  	struct task_struct *task = current;
50704516f   Al Viro   Fix uninitialized...
1753
  	struct files_struct *copy = NULL;
3b1253880   Al Viro   [PATCH] sanitize ...
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
  	int error;
  
  	error = unshare_fd(CLONE_FILES, &copy);
  	if (error || !copy) {
  		*displaced = NULL;
  		return error;
  	}
  	*displaced = task->files;
  	task_lock(task);
  	task->files = copy;
  	task_unlock(task);
  	return 0;
  }