Blame view

kernel/fork.c 42 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   *  linux/kernel/fork.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
  /*
   *  'fork.c' contains the help-routines for the 'fork' system call
   * (see also entry.S and others).
   * Fork is rather simple, once you get the hang of it, but the memory
   * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/unistd.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
18
  #include <linux/module.h>
  #include <linux/vmalloc.h>
  #include <linux/completion.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
22
  #include <linux/personality.h>
  #include <linux/mempolicy.h>
  #include <linux/sem.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
23
  #include <linux/fdtable.h>
da9cbc873   Jens Axboe   block: blkdev.h c...
24
  #include <linux/iocontext.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
  #include <linux/key.h>
  #include <linux/binfmts.h>
  #include <linux/mman.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
28
  #include <linux/mmu_notifier.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  #include <linux/fs.h>
ab516013a   Serge E. Hallyn   [PATCH] namespace...
30
  #include <linux/nsproxy.h>
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
31
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  #include <linux/cpu.h>
b4f48b636   Paul Menage   Task Control Grou...
33
  #include <linux/cgroup.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34
  #include <linux/security.h>
a1e78772d   Mel Gorman   hugetlb: reserve ...
35
  #include <linux/hugetlb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
37
38
  #include <linux/swap.h>
  #include <linux/syscalls.h>
  #include <linux/jiffies.h>
09a05394f   Roland McGrath   tracehook: clone
39
  #include <linux/tracehook.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
  #include <linux/futex.h>
8141c7f3e   Linus Torvalds   Move "exit_robust...
41
  #include <linux/compat.h>
207205a2b   Eric Dumazet   kthread: NUMA awa...
42
  #include <linux/kthread.h>
7c3ab7381   Andrew Morton   [PATCH] io-accoun...
43
  #include <linux/task_io_accounting_ops.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
44
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
  #include <linux/ptrace.h>
  #include <linux/mount.h>
  #include <linux/audit.h>
78fb74669   Pavel Emelianov   Memory controller...
48
  #include <linux/memcontrol.h>
f201ae235   Frederic Weisbecker   tracing/function-...
49
  #include <linux/ftrace.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50
51
  #include <linux/profile.h>
  #include <linux/rmap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
52
  #include <linux/ksm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
  #include <linux/acct.h>
8f0ab5147   Jay Lan   [PATCH] csa: conv...
54
  #include <linux/tsacct_kern.h>
9f46080c4   Matt Helsley   [PATCH] Process E...
55
  #include <linux/cn_proc.h>
ba96a0c88   Rafael J. Wysocki   freezer: fix vfor...
56
  #include <linux/freezer.h>
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
57
  #include <linux/delayacct.h>
ad4ecbcba   Shailabh Nagar   [PATCH] delay acc...
58
  #include <linux/taskstats_kern.h>
0a4254058   Arjan van de Ven   [PATCH] Add the c...
59
  #include <linux/random.h>
522ed7767   Miloslav Trmac   Audit: add TTY in...
60
  #include <linux/tty.h>
fd0928df9   Jens Axboe   ioprio: move io p...
61
  #include <linux/blkdev.h>
5ad4e53bd   Al Viro   Get rid of indire...
62
  #include <linux/fs_struct.h>
7c9f8861e   Eric Sandeen   stackprotector: u...
63
  #include <linux/magic.h>
cdd6c482c   Ingo Molnar   perf: Do the big ...
64
  #include <linux/perf_event.h>
42c4ab41a   Stanislaw Gruszka   itimers: Merge IT...
65
  #include <linux/posix-timers.h>
8e7cac798   Avi Kivity   core: Fix user re...
66
  #include <linux/user-return-notifier.h>
3d5992d2a   Ying Han   oom: add per-mm o...
67
  #include <linux/oom.h>
ba76149f4   Andrea Arcangeli   thp: khugepaged
68
  #include <linux/khugepaged.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
69
70
71
72
73
74
75
  
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
  #include <asm/uaccess.h>
  #include <asm/mmu_context.h>
  #include <asm/cacheflush.h>
  #include <asm/tlbflush.h>
ad8d75fff   Steven Rostedt   tracing/events: m...
76
  #include <trace/events/sched.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
77
78
79
80
81
82
83
84
85
  /*
   * Protected counters by write_lock_irq(&tasklist_lock)
   */
  unsigned long total_forks;	/* Handle normal Linux uptimes. */
  int nr_threads; 		/* The idle threads do not count.. */
  
  int max_threads;		/* tunable limit on nr_threads */
  
  DEFINE_PER_CPU(unsigned long, process_counts) = 0;
c59923a15   Christoph Hellwig   [PATCH] remove th...
86
  __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
db1466b3e   Paul E. McKenney   rcu: Use wrapper ...
87
88
89
90
91
92
93
94
  
  #ifdef CONFIG_PROVE_RCU
  int lockdep_tasklist_lock_is_held(void)
  {
  	return lockdep_is_held(&tasklist_lock);
  }
  EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
  #endif /* #ifdef CONFIG_PROVE_RCU */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95
96
97
98
99
  
  int nr_processes(void)
  {
  	int cpu;
  	int total = 0;
1d5107509   Ian Campbell   Correct nr_proces...
100
  	for_each_possible_cpu(cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101
102
103
104
105
106
  		total += per_cpu(process_counts, cpu);
  
  	return total;
  }
  
  #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
504f52b54   Eric Dumazet   mm: NUMA aware al...
107
108
109
110
  # define alloc_task_struct_node(node)		\
  		kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
  # define free_task_struct(tsk)			\
  		kmem_cache_free(task_struct_cachep, (tsk))
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
111
  static struct kmem_cache *task_struct_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
  #endif
b69c49b78   FUJITA Tomonori   clean up duplicat...
113
  #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
b6a84016b   Eric Dumazet   mm: NUMA aware al...
114
115
  static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
  						  int node)
b69c49b78   FUJITA Tomonori   clean up duplicat...
116
117
118
119
120
121
  {
  #ifdef CONFIG_DEBUG_STACK_USAGE
  	gfp_t mask = GFP_KERNEL | __GFP_ZERO;
  #else
  	gfp_t mask = GFP_KERNEL;
  #endif
b6a84016b   Eric Dumazet   mm: NUMA aware al...
122
123
124
  	struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
  
  	return page ? page_address(page) : NULL;
b69c49b78   FUJITA Tomonori   clean up duplicat...
125
126
127
128
129
130
131
  }
  
  static inline void free_thread_info(struct thread_info *ti)
  {
  	free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
  /* SLAB cache for signal_struct structures (tsk->signal) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
133
  static struct kmem_cache *signal_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
134
135
  
  /* SLAB cache for sighand_struct structures (tsk->sighand) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
136
  struct kmem_cache *sighand_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
137
138
  
  /* SLAB cache for files_struct structures (tsk->files) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
139
  struct kmem_cache *files_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
140
141
  
  /* SLAB cache for fs_struct structures (tsk->fs) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
142
  struct kmem_cache *fs_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
143
144
  
  /* SLAB cache for vm_area_struct structures */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
145
  struct kmem_cache *vm_area_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
147
  
  /* SLAB cache for mm_struct structures (tsk->mm) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
148
  static struct kmem_cache *mm_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149

c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
150
151
152
153
154
155
  static void account_kernel_stack(struct thread_info *ti, int account)
  {
  	struct zone *zone = page_zone(virt_to_page(ti));
  
  	mod_zone_page_state(zone, NR_KERNEL_STACK, account);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
157
  void free_task(struct task_struct *tsk)
  {
3e26c149c   Peter Zijlstra   mm: dirty balanci...
158
  	prop_local_destroy_single(&tsk->dirties);
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
159
  	account_kernel_stack(tsk->stack, -1);
f7e4217b0   Roman Zippel   rename thread_inf...
160
  	free_thread_info(tsk->stack);
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
161
  	rt_mutex_debug_task_free(tsk);
fb52607af   Frederic Weisbecker   tracing/function-...
162
  	ftrace_graph_exit_task(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163
164
165
  	free_task_struct(tsk);
  }
  EXPORT_SYMBOL(free_task);
ea6d290ca   Oleg Nesterov   signals: make tas...
166
167
  static inline void free_signal_struct(struct signal_struct *sig)
  {
97101eb41   Oleg Nesterov   exit: move taskst...
168
  	taskstats_tgid_free(sig);
1c5354de9   Mike Galbraith   sched: Move sched...
169
  	sched_autogroup_exit(sig);
ea6d290ca   Oleg Nesterov   signals: make tas...
170
171
172
173
174
  	kmem_cache_free(signal_cachep, sig);
  }
  
  static inline void put_signal_struct(struct signal_struct *sig)
  {
1c5354de9   Mike Galbraith   sched: Move sched...
175
  	if (atomic_dec_and_test(&sig->sigcnt))
ea6d290ca   Oleg Nesterov   signals: make tas...
176
177
  		free_signal_struct(sig);
  }
158d9ebd1   Andrew Morton   [PATCH] resurrect...
178
  void __put_task_struct(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
  {
270f722d4   Eugene Teo   Fix tsk->exit_sta...
180
  	WARN_ON(!tsk->exit_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
182
  	WARN_ON(atomic_read(&tsk->usage));
  	WARN_ON(tsk == current);
e0e817392   David Howells   CRED: Add some co...
183
  	exit_creds(tsk);
35df17c57   Shailabh Nagar   [PATCH] task dela...
184
  	delayacct_tsk_free(tsk);
ea6d290ca   Oleg Nesterov   signals: make tas...
185
  	put_signal_struct(tsk->signal);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
186
187
188
189
  
  	if (!profile_handoff_task(tsk))
  		free_task(tsk);
  }
77c100c83   Rik van Riel   export pid symbol...
190
  EXPORT_SYMBOL_GPL(__put_task_struct);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191

2adee9b30   Suresh Siddha   x86: fpu xstate s...
192
193
194
195
196
197
198
  /*
   * macro override instead of weak attribute alias, to workaround
   * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
   */
  #ifndef arch_task_cache_init
  #define arch_task_cache_init()
  #endif
61c4628b5   Suresh Siddha   x86, fpu: split F...
199

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
200
201
202
203
204
205
206
207
208
  void __init fork_init(unsigned long mempages)
  {
  #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
  #ifndef ARCH_MIN_TASKALIGN
  #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
  #endif
  	/* create a slab on which task_structs can be allocated */
  	task_struct_cachep =
  		kmem_cache_create("task_struct", sizeof(struct task_struct),
2dff44052   Vegard Nossum   kmemcheck: add mm...
209
  			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
210
  #endif
61c4628b5   Suresh Siddha   x86, fpu: split F...
211
212
  	/* do the arch specific task caches init */
  	arch_task_cache_init();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
  	/*
  	 * The default maximum number of threads is set to a safe
  	 * value: the thread structures can take up at most half
  	 * of memory.
  	 */
  	max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
  
  	/*
  	 * we need to allow at least 20 threads to boot a system
  	 */
  	if(max_threads < 20)
  		max_threads = 20;
  
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
  		init_task.signal->rlim[RLIMIT_NPROC];
  }
61c4628b5   Suresh Siddha   x86, fpu: split F...
231
232
233
234
235
236
  int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
  					       struct task_struct *src)
  {
  	*dst = *src;
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
237
238
239
240
  static struct task_struct *dup_task_struct(struct task_struct *orig)
  {
  	struct task_struct *tsk;
  	struct thread_info *ti;
7c9f8861e   Eric Sandeen   stackprotector: u...
241
  	unsigned long *stackend;
207205a2b   Eric Dumazet   kthread: NUMA awa...
242
  	int node = tsk_fork_get_node(orig);
3e26c149c   Peter Zijlstra   mm: dirty balanci...
243
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
245
  
  	prepare_to_copy(orig);
504f52b54   Eric Dumazet   mm: NUMA aware al...
246
  	tsk = alloc_task_struct_node(node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
247
248
  	if (!tsk)
  		return NULL;
b6a84016b   Eric Dumazet   mm: NUMA aware al...
249
  	ti = alloc_thread_info_node(tsk, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
251
252
253
  	if (!ti) {
  		free_task_struct(tsk);
  		return NULL;
  	}
61c4628b5   Suresh Siddha   x86, fpu: split F...
254
255
256
   	err = arch_dup_task_struct(tsk, orig);
  	if (err)
  		goto out;
f7e4217b0   Roman Zippel   rename thread_inf...
257
  	tsk->stack = ti;
3e26c149c   Peter Zijlstra   mm: dirty balanci...
258
259
  
  	err = prop_local_init_single(&tsk->dirties);
61c4628b5   Suresh Siddha   x86, fpu: split F...
260
261
  	if (err)
  		goto out;
3e26c149c   Peter Zijlstra   mm: dirty balanci...
262

10ebffde3   Al Viro   [PATCH] m68k: int...
263
  	setup_thread_stack(tsk, orig);
8e7cac798   Avi Kivity   core: Fix user re...
264
  	clear_user_return_notifier(tsk);
f26f9aff6   Mike Galbraith   Sched: fix skip_c...
265
  	clear_tsk_need_resched(tsk);
7c9f8861e   Eric Sandeen   stackprotector: u...
266
267
  	stackend = end_of_stack(tsk);
  	*stackend = STACK_END_MAGIC;	/* for overflow detection */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268

0a4254058   Arjan van de Ven   [PATCH] Add the c...
269
270
271
  #ifdef CONFIG_CC_STACKPROTECTOR
  	tsk->stack_canary = get_random_int();
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
272
273
  	/* One for us, one for whoever does the "release_task()" (usually parent) */
  	atomic_set(&tsk->usage,2);
4b5d37ac0   Giancarlo Formicuccia   [PATCH] Clear tas...
274
  	atomic_set(&tsk->fs_excl, 0);
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
275
  #ifdef CONFIG_BLK_DEV_IO_TRACE
2056a782f   Jens Axboe   [PATCH] Block que...
276
  	tsk->btrace_seq = 0;
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
277
  #endif
a0aa7f68a   Jens Axboe   [PATCH] Don't inh...
278
  	tsk->splice_pipe = NULL;
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
279
280
  
  	account_kernel_stack(ti, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281
  	return tsk;
61c4628b5   Suresh Siddha   x86, fpu: split F...
282
283
284
285
286
  
  out:
  	free_thread_info(ti);
  	free_task_struct(tsk);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287
288
289
  }
  
  #ifdef CONFIG_MMU
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
290
  static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
291
  {
297c5eee3   Linus Torvalds   mm: make the vma ...
292
  	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
294
295
296
297
298
  	struct rb_node **rb_link, *rb_parent;
  	int retval;
  	unsigned long charge;
  	struct mempolicy *pol;
  
  	down_write(&oldmm->mmap_sem);
ec8c0446b   Ralf Baechle   [PATCH] Optimize ...
299
  	flush_cache_dup_mm(oldmm);
ad3394517   Ingo Molnar   [PATCH] lockdep: ...
300
301
302
303
  	/*
  	 * Not linked in yet - no deadlock potential:
  	 */
  	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
304

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
306
307
308
  	mm->locked_vm = 0;
  	mm->mmap = NULL;
  	mm->mmap_cache = NULL;
  	mm->free_area_cache = oldmm->mmap_base;
1363c3cd8   Wolfgang Wander   [PATCH] Avoiding ...
309
  	mm->cached_hole_size = ~0UL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
  	mm->map_count = 0;
948942445   Rusty Russell   cpumask: use mm_c...
311
  	cpumask_clear(mm_cpumask(mm));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
312
313
314
315
  	mm->mm_rb = RB_ROOT;
  	rb_link = &mm->mm_rb.rb_node;
  	rb_parent = NULL;
  	pprev = &mm->mmap;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
316
317
318
  	retval = ksm_fork(mm, oldmm);
  	if (retval)
  		goto out;
ba76149f4   Andrea Arcangeli   thp: khugepaged
319
320
321
  	retval = khugepaged_fork(mm, oldmm);
  	if (retval)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322

297c5eee3   Linus Torvalds   mm: make the vma ...
323
  	prev = NULL;
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
324
  	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
325
326
327
  		struct file *file;
  
  		if (mpnt->vm_flags & VM_DONTCOPY) {
3b6bfcdb1   Hugh Dickins   [PATCH] lower VM_...
328
329
  			long pages = vma_pages(mpnt);
  			mm->total_vm -= pages;
ab50b8ed8   Hugh Dickins   [PATCH] mm: vm_st...
330
  			vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
3b6bfcdb1   Hugh Dickins   [PATCH] lower VM_...
331
  								-pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
332
333
334
335
336
337
338
339
340
  			continue;
  		}
  		charge = 0;
  		if (mpnt->vm_flags & VM_ACCOUNT) {
  			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
  			if (security_vm_enough_memory(len))
  				goto fail_nomem;
  			charge = len;
  		}
e94b17660   Christoph Lameter   [PATCH] slab: rem...
341
  		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
342
343
344
  		if (!tmp)
  			goto fail_nomem;
  		*tmp = *mpnt;
5beb49305   Rik van Riel   mm: change anon_v...
345
  		INIT_LIST_HEAD(&tmp->anon_vma_chain);
846a16bf0   Lee Schermerhorn   mempolicy: rename...
346
  		pol = mpol_dup(vma_policy(mpnt));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
347
348
349
350
  		retval = PTR_ERR(pol);
  		if (IS_ERR(pol))
  			goto fail_nomem_policy;
  		vma_set_policy(tmp, pol);
a247c3a97   Andrea Arcangeli   rmap: fix walk du...
351
  		tmp->vm_mm = mm;
5beb49305   Rik van Riel   mm: change anon_v...
352
353
  		if (anon_vma_fork(tmp, mpnt))
  			goto fail_nomem_anon_vma_fork;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
354
  		tmp->vm_flags &= ~VM_LOCKED;
297c5eee3   Linus Torvalds   mm: make the vma ...
355
  		tmp->vm_next = tmp->vm_prev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
357
  		file = tmp->vm_file;
  		if (file) {
f3a43f3f6   Josef "Jeff" Sipek   [PATCH] kernel: c...
358
  			struct inode *inode = file->f_path.dentry->d_inode;
b88ed2059   Hugh Dickins   fix mapping_writa...
359
  			struct address_space *mapping = file->f_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
360
361
362
  			get_file(file);
  			if (tmp->vm_flags & VM_DENYWRITE)
  				atomic_dec(&inode->i_writecount);
3d48ae45e   Peter Zijlstra   mm: Convert i_mma...
363
  			mutex_lock(&mapping->i_mmap_mutex);
b88ed2059   Hugh Dickins   fix mapping_writa...
364
365
  			if (tmp->vm_flags & VM_SHARED)
  				mapping->i_mmap_writable++;
b88ed2059   Hugh Dickins   fix mapping_writa...
366
367
  			flush_dcache_mmap_lock(mapping);
  			/* insert tmp into the share list, just after mpnt */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
368
  			vma_prio_tree_add(tmp, mpnt);
b88ed2059   Hugh Dickins   fix mapping_writa...
369
  			flush_dcache_mmap_unlock(mapping);
3d48ae45e   Peter Zijlstra   mm: Convert i_mma...
370
  			mutex_unlock(&mapping->i_mmap_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
372
373
  		}
  
  		/*
a1e78772d   Mel Gorman   hugetlb: reserve ...
374
375
376
377
378
379
380
381
  		 * Clear hugetlb-related page reserves for children. This only
  		 * affects MAP_PRIVATE mappings. Faults generated by the child
  		 * are not guaranteed to succeed, even if read-only
  		 */
  		if (is_vm_hugetlb_page(tmp))
  			reset_vma_resv_huge_pages(tmp);
  
  		/*
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
382
  		 * Link in the new vma and copy the page table entries.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
383
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
384
385
  		*pprev = tmp;
  		pprev = &tmp->vm_next;
297c5eee3   Linus Torvalds   mm: make the vma ...
386
387
  		tmp->vm_prev = prev;
  		prev = tmp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
388
389
390
391
392
393
  
  		__vma_link_rb(mm, tmp, rb_link, rb_parent);
  		rb_link = &tmp->vm_rb.rb_right;
  		rb_parent = &tmp->vm_rb;
  
  		mm->map_count++;
0b0db14c5   Hugh Dickins   [PATCH] unpaged: ...
394
  		retval = copy_page_range(mm, oldmm, mpnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
395
396
397
398
399
400
401
  
  		if (tmp->vm_ops && tmp->vm_ops->open)
  			tmp->vm_ops->open(tmp);
  
  		if (retval)
  			goto out;
  	}
d6dd61c83   Jeremy Fitzhardinge   [PATCH] x86: PARA...
402
403
  	/* a new mm has just been created */
  	arch_dup_mmap(oldmm, mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
404
  	retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
405
  out:
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
406
  	up_write(&mm->mmap_sem);
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
407
  	flush_tlb_mm(oldmm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408
409
  	up_write(&oldmm->mmap_sem);
  	return retval;
5beb49305   Rik van Riel   mm: change anon_v...
410
411
  fail_nomem_anon_vma_fork:
  	mpol_put(pol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
  fail_nomem_policy:
  	kmem_cache_free(vm_area_cachep, tmp);
  fail_nomem:
  	retval = -ENOMEM;
  	vm_unacct_memory(charge);
  	goto out;
  }
  
  static inline int mm_alloc_pgd(struct mm_struct * mm)
  {
  	mm->pgd = pgd_alloc(mm);
  	if (unlikely(!mm->pgd))
  		return -ENOMEM;
  	return 0;
  }
  
  static inline void mm_free_pgd(struct mm_struct * mm)
  {
5e5419734   Benjamin Herrenschmidt   add mm argument t...
430
  	pgd_free(mm, mm->pgd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
431
432
433
434
435
436
  }
  #else
  #define dup_mmap(mm, oldmm)	(0)
  #define mm_alloc_pgd(mm)	(0)
  #define mm_free_pgd(mm)
  #endif /* CONFIG_MMU */
23ff44402   Daniel Walker   whitespace fixes:...
437
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438

e94b17660   Christoph Lameter   [PATCH] slab: rem...
439
  #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
  #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
4cb0e11b1   Hidehiro Kawai   coredump_filter: ...
441
442
443
444
445
446
447
448
449
450
451
  static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
  
  static int __init coredump_filter_setup(char *s)
  {
  	default_dump_filter =
  		(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
  		MMF_DUMP_FILTER_MASK;
  	return 1;
  }
  
  __setup("coredump_filter=", coredump_filter_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
  #include <linux/init_task.h>
858f09930   Alexey Dobriyan   aio: ifdef fields...
453
454
455
456
457
458
459
  static void mm_init_aio(struct mm_struct *mm)
  {
  #ifdef CONFIG_AIO
  	spin_lock_init(&mm->ioctx_lock);
  	INIT_HLIST_HEAD(&mm->ioctx_list);
  #endif
  }
78fb74669   Pavel Emelianov   Memory controller...
460
  static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
462
463
464
465
  {
  	atomic_set(&mm->mm_users, 1);
  	atomic_set(&mm->mm_count, 1);
  	init_rwsem(&mm->mmap_sem);
  	INIT_LIST_HEAD(&mm->mmlist);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
466
467
  	mm->flags = (current->mm) ?
  		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
999d9fc16   Oleg Nesterov   coredump: move mm...
468
  	mm->core_state = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
469
  	mm->nr_ptes = 0;
d559db086   KAMEZAWA Hiroyuki   mm: clean up mm_c...
470
  	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
  	spin_lock_init(&mm->page_table_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
472
  	mm->free_area_cache = TASK_UNMAPPED_BASE;
1363c3cd8   Wolfgang Wander   [PATCH] Avoiding ...
473
  	mm->cached_hole_size = ~0UL;
858f09930   Alexey Dobriyan   aio: ifdef fields...
474
  	mm_init_aio(mm);
cf475ad28   Balbir Singh   cgroups: add an o...
475
  	mm_init_owner(mm, p);
3d5992d2a   Ying Han   oom: add per-mm o...
476
  	atomic_set(&mm->oom_disable_count, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477
478
479
  
  	if (likely(!mm_alloc_pgd(mm))) {
  		mm->def_flags = 0;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
480
  		mmu_notifier_mm_init(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
481
482
  		return mm;
  	}
78fb74669   Pavel Emelianov   Memory controller...
483

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
484
485
486
487
488
489
490
491
492
493
494
495
  	free_mm(mm);
  	return NULL;
  }
  
  /*
   * Allocate and initialize an mm_struct.
   */
  struct mm_struct * mm_alloc(void)
  {
  	struct mm_struct * mm;
  
  	mm = allocate_mm();
de03c72cf   KOSAKI Motohiro   mm: convert mm->c...
496
497
498
499
  	if (!mm)
  		return NULL;
  
  	memset(mm, 0, sizeof(*mm));
6345d24da   Linus Torvalds   mm: Fix boot cras...
500
501
  	mm_init_cpumask(mm);
  	return mm_init(mm, current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
502
503
504
505
506
507
508
  }
  
  /*
   * Called when the last reference to the mm
   * is dropped: either by a lazy thread or by
   * mmput. Free the page directory and the mm.
   */
7ad5b3a50   Harvey Harrison   kernel: remove fa...
509
  void __mmdrop(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
511
512
513
  {
  	BUG_ON(mm == &init_mm);
  	mm_free_pgd(mm);
  	destroy_context(mm);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
514
  	mmu_notifier_mm_destroy(mm);
e7a00c45f   Andrea Arcangeli   thp: add pmd_huge...
515
516
517
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  	VM_BUG_ON(mm->pmd_huge_pte);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518
519
  	free_mm(mm);
  }
6d4e4c4fc   Avi Kivity   KVM: Disallow for...
520
  EXPORT_SYMBOL_GPL(__mmdrop);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
521
522
523
524
525
526
  
  /*
   * Decrement the use count and release all resources for an mm.
   */
  void mmput(struct mm_struct *mm)
  {
0ae26f1b3   Andrew Morton   [PATCH] mmput() m...
527
  	might_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
528
529
  	if (atomic_dec_and_test(&mm->mm_users)) {
  		exit_aio(mm);
1c2fb7a4c   Andrea Arcangeli   ksm: fix deadlock...
530
  		ksm_exit(mm);
ba76149f4   Andrea Arcangeli   thp: khugepaged
531
  		khugepaged_exit(mm); /* must run before exit_mmap */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
532
  		exit_mmap(mm);
925d1c401   Matt Helsley   procfs task exe s...
533
  		set_mm_exe_file(mm, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
534
535
536
537
538
539
  		if (!list_empty(&mm->mmlist)) {
  			spin_lock(&mmlist_lock);
  			list_del(&mm->mmlist);
  			spin_unlock(&mmlist_lock);
  		}
  		put_swap_token(mm);
801460d0c   Hiroshi Shimamoto   task_struct clean...
540
541
  		if (mm->binfmt)
  			module_put(mm->binfmt->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542
543
544
545
  		mmdrop(mm);
  	}
  }
  EXPORT_SYMBOL_GPL(mmput);
386460138   Jiri Slaby   mm: extract exe_f...
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
  /*
   * We added or removed a vma mapping the executable. The vmas are only mapped
   * during exec and are not mapped with the mmap system call.
   * Callers must hold down_write() on the mm's mmap_sem for these
   */
  void added_exe_file_vma(struct mm_struct *mm)
  {
  	mm->num_exe_file_vmas++;
  }
  
  void removed_exe_file_vma(struct mm_struct *mm)
  {
  	mm->num_exe_file_vmas--;
  	if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
  		fput(mm->exe_file);
  		mm->exe_file = NULL;
  	}
  
  }
  
  void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
  {
  	if (new_exe_file)
  		get_file(new_exe_file);
  	if (mm->exe_file)
  		fput(mm->exe_file);
  	mm->exe_file = new_exe_file;
  	mm->num_exe_file_vmas = 0;
  }
  
  struct file *get_mm_exe_file(struct mm_struct *mm)
  {
  	struct file *exe_file;
  
  	/* We need mmap_sem to protect against races with removal of
  	 * VM_EXECUTABLE vmas */
  	down_read(&mm->mmap_sem);
  	exe_file = mm->exe_file;
  	if (exe_file)
  		get_file(exe_file);
  	up_read(&mm->mmap_sem);
  	return exe_file;
  }
  
  static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
  {
  	/* It's safe to write the exe_file pointer without exe_file_lock because
  	 * this is called during fork when the task is not yet in /proc */
  	newmm->exe_file = get_mm_exe_file(oldmm);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
596
597
598
  /**
   * get_task_mm - acquire a reference to the task's mm
   *
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
599
   * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
600
601
602
603
604
605
606
607
608
609
610
611
   * this kernel workthread has transiently adopted a user mm with use_mm,
   * to do its AIO) is not set and if so returns a reference to it, after
   * bumping up the use count.  User must release the mm via mmput()
   * after use.  Typically used by /proc and ptrace.
   */
  struct mm_struct *get_task_mm(struct task_struct *task)
  {
  	struct mm_struct *mm;
  
  	task_lock(task);
  	mm = task->mm;
  	if (mm) {
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
612
  		if (task->flags & PF_KTHREAD)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
  			mm = NULL;
  		else
  			atomic_inc(&mm->mm_users);
  	}
  	task_unlock(task);
  	return mm;
  }
  EXPORT_SYMBOL_GPL(get_task_mm);
  
  /* Please note the differences between mmput and mm_release.
   * mmput is called whenever we stop holding onto a mm_struct,
   * error success whatever.
   *
   * mm_release is called after a mm_struct has been removed
   * from the current process.
   *
   * This difference is important for error handling, when we
   * only half set up a mm_struct for a new process and need to restore
   * the old one.  Because we mmput the new mm_struct before
   * restoring the old one. . .
   * Eric Biederman 10 January 1998
   */
  void mm_release(struct task_struct *tsk, struct mm_struct *mm)
  {
  	struct completion *vfork_done = tsk->vfork_done;
8141c7f3e   Linus Torvalds   Move "exit_robust...
638
639
  	/* Get rid of any futexes when releasing the mm */
  #ifdef CONFIG_FUTEX
fc6b177de   Peter Zijlstra   futex: Nullify ro...
640
  	if (unlikely(tsk->robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
641
  		exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
642
643
  		tsk->robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
644
  #ifdef CONFIG_COMPAT
fc6b177de   Peter Zijlstra   futex: Nullify ro...
645
  	if (unlikely(tsk->compat_robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
646
  		compat_exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
647
648
  		tsk->compat_robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
649
  #endif
322a2c100   Thomas Gleixner   futex: Move exit_...
650
651
  	if (unlikely(!list_empty(&tsk->pi_state_list)))
  		exit_pi_state_list(tsk);
8141c7f3e   Linus Torvalds   Move "exit_robust...
652
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
653
654
655
656
657
658
659
660
  	/* Get rid of any cached register state */
  	deactivate_mm(tsk, mm);
  
  	/* notify parent sleeping on vfork() */
  	if (vfork_done) {
  		tsk->vfork_done = NULL;
  		complete(vfork_done);
  	}
fec1d0115   Roland McGrath   [PATCH] Disable C...
661
662
663
664
665
666
667
  
  	/*
  	 * If we're exiting normally, clear a user-space tid field if
  	 * requested.  We leave this alone when dying by signal, to leave
  	 * the value intact in a core dump, and to save the unnecessary
  	 * trouble otherwise.  Userland only wants this done for a sys_exit.
  	 */
9c8a8228d   Eric Dumazet   execve: must clea...
668
669
670
671
672
673
674
675
676
677
678
  	if (tsk->clear_child_tid) {
  		if (!(tsk->flags & PF_SIGNALED) &&
  		    atomic_read(&mm->mm_users) > 1) {
  			/*
  			 * We don't check the error code - if userspace has
  			 * not set up a proper pointer then tough luck.
  			 */
  			put_user(0, tsk->clear_child_tid);
  			sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
  					1, NULL, NULL, 0);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
679
  		tsk->clear_child_tid = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
680
681
  	}
  }
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
682
683
684
685
  /*
   * Allocate a new mm structure and copy contents from the
   * mm structure of the passed in task structure.
   */
402b08622   Carsten Otte   s390: KVM prepara...
686
  struct mm_struct *dup_mm(struct task_struct *tsk)
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
687
688
689
690
691
692
693
694
695
696
697
698
  {
  	struct mm_struct *mm, *oldmm = current->mm;
  	int err;
  
  	if (!oldmm)
  		return NULL;
  
  	mm = allocate_mm();
  	if (!mm)
  		goto fail_nomem;
  
  	memcpy(mm, oldmm, sizeof(*mm));
6345d24da   Linus Torvalds   mm: Fix boot cras...
699
  	mm_init_cpumask(mm);
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
700

7602bdf2f   Ashwin Chaugule   [PATCH] new schem...
701
702
703
  	/* Initializing for Swap token stuff */
  	mm->token_priority = 0;
  	mm->last_interval = 0;
e7a00c45f   Andrea Arcangeli   thp: add pmd_huge...
704
705
706
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  	mm->pmd_huge_pte = NULL;
  #endif
78fb74669   Pavel Emelianov   Memory controller...
707
  	if (!mm_init(mm, tsk))
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
708
709
710
711
  		goto fail_nomem;
  
  	if (init_new_context(tsk, mm))
  		goto fail_nocontext;
925d1c401   Matt Helsley   procfs task exe s...
712
  	dup_mm_exe_file(oldmm, mm);
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
713
714
715
716
717
718
  	err = dup_mmap(mm, oldmm);
  	if (err)
  		goto free_pt;
  
  	mm->hiwater_rss = get_mm_rss(mm);
  	mm->hiwater_vm = mm->total_vm;
801460d0c   Hiroshi Shimamoto   task_struct clean...
719
720
  	if (mm->binfmt && !try_module_get(mm->binfmt->module))
  		goto free_pt;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
721
722
723
  	return mm;
  
  free_pt:
801460d0c   Hiroshi Shimamoto   task_struct clean...
724
725
  	/* don't put binfmt in mmput, we haven't got module yet */
  	mm->binfmt = NULL;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
726
727
728
729
730
731
732
733
734
735
736
737
738
739
  	mmput(mm);
  
  fail_nomem:
  	return NULL;
  
  fail_nocontext:
  	/*
  	 * If init_new_context() failed, we cannot use mmput() to free the mm
  	 * because it calls destroy_context()
  	 */
  	mm_free_pgd(mm);
  	free_mm(mm);
  	return NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
740
741
742
743
744
745
746
  static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
  {
  	struct mm_struct * mm, *oldmm;
  	int retval;
  
  	tsk->min_flt = tsk->maj_flt = 0;
  	tsk->nvcsw = tsk->nivcsw = 0;
17406b82d   Mandeep Singh Baines   softlockup: remov...
747
748
749
  #ifdef CONFIG_DETECT_HUNG_TASK
  	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
  
  	tsk->mm = NULL;
  	tsk->active_mm = NULL;
  
  	/*
  	 * Are we cloning a kernel thread?
  	 *
  	 * We need to steal a active VM for that..
  	 */
  	oldmm = current->mm;
  	if (!oldmm)
  		return 0;
  
  	if (clone_flags & CLONE_VM) {
  		atomic_inc(&oldmm->mm_users);
  		mm = oldmm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
766
767
768
769
  		goto good_mm;
  	}
  
  	retval = -ENOMEM;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
770
  	mm = dup_mm(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
771
772
  	if (!mm)
  		goto fail_nomem;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
773
  good_mm:
7602bdf2f   Ashwin Chaugule   [PATCH] new schem...
774
775
776
  	/* Initializing for Swap token stuff */
  	mm->token_priority = 0;
  	mm->last_interval = 0;
3d5992d2a   Ying Han   oom: add per-mm o...
777
778
  	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
  		atomic_inc(&mm->oom_disable_count);
7602bdf2f   Ashwin Chaugule   [PATCH] new schem...
779

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
781
782
  	tsk->mm = mm;
  	tsk->active_mm = mm;
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
783
784
  fail_nomem:
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
785
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
786
  static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
787
  {
498052bba   Al Viro   New locking/refco...
788
  	struct fs_struct *fs = current->fs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
  	if (clone_flags & CLONE_FS) {
498052bba   Al Viro   New locking/refco...
790
  		/* tsk->fs is already what we want */
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
791
  		spin_lock(&fs->lock);
498052bba   Al Viro   New locking/refco...
792
  		if (fs->in_exec) {
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
793
  			spin_unlock(&fs->lock);
498052bba   Al Viro   New locking/refco...
794
795
796
  			return -EAGAIN;
  		}
  		fs->users++;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
797
  		spin_unlock(&fs->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
798
799
  		return 0;
  	}
498052bba   Al Viro   New locking/refco...
800
  	tsk->fs = copy_fs_struct(fs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
801
802
803
804
  	if (!tsk->fs)
  		return -ENOMEM;
  	return 0;
  }
a016f3389   JANAK DESAI   [PATCH] unshare s...
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
  static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
  {
  	struct files_struct *oldf, *newf;
  	int error = 0;
  
  	/*
  	 * A background process may not have any files ...
  	 */
  	oldf = current->files;
  	if (!oldf)
  		goto out;
  
  	if (clone_flags & CLONE_FILES) {
  		atomic_inc(&oldf->count);
  		goto out;
  	}
a016f3389   JANAK DESAI   [PATCH] unshare s...
821
822
823
824
825
826
827
828
829
  	newf = dup_fd(oldf, &error);
  	if (!newf)
  		goto out;
  
  	tsk->files = newf;
  	error = 0;
  out:
  	return error;
  }
fadad878c   Jens Axboe   kernel: add CLONE...
830
  static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
fd0928df9   Jens Axboe   ioprio: move io p...
831
832
833
834
835
836
  {
  #ifdef CONFIG_BLOCK
  	struct io_context *ioc = current->io_context;
  
  	if (!ioc)
  		return 0;
fadad878c   Jens Axboe   kernel: add CLONE...
837
838
839
840
841
842
843
844
  	/*
  	 * Share io context with parent, if CLONE_IO is set
  	 */
  	if (clone_flags & CLONE_IO) {
  		tsk->io_context = ioc_task_link(ioc);
  		if (unlikely(!tsk->io_context))
  			return -ENOMEM;
  	} else if (ioprio_valid(ioc->ioprio)) {
fd0928df9   Jens Axboe   ioprio: move io p...
845
846
847
  		tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
  		if (unlikely(!tsk->io_context))
  			return -ENOMEM;
fd0928df9   Jens Axboe   ioprio: move io p...
848
849
850
851
852
  		tsk->io_context->ioprio = ioc->ioprio;
  	}
  #endif
  	return 0;
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
853
  static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854
855
  {
  	struct sighand_struct *sig;
60348802e   Zhaolei   fork.c: cleanup f...
856
  	if (clone_flags & CLONE_SIGHAND) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
857
858
859
860
  		atomic_inc(&current->sighand->count);
  		return 0;
  	}
  	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
e56d09031   Ingo Molnar   [PATCH] RCU signa...
861
  	rcu_assign_pointer(tsk->sighand, sig);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
862
863
  	if (!sig)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
864
865
866
867
  	atomic_set(&sig->count, 1);
  	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
  	return 0;
  }
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
868
  void __cleanup_sighand(struct sighand_struct *sighand)
c81addc9d   Oleg Nesterov   [PATCH] rename __...
869
  {
c81addc9d   Oleg Nesterov   [PATCH] rename __...
870
871
872
  	if (atomic_dec_and_test(&sighand->count))
  		kmem_cache_free(sighand_cachep, sighand);
  }
f06febc96   Frank Mayhar   timers: fix itime...
873
874
875
876
877
878
  
  /*
   * Initialize POSIX timer handling for a thread group.
   */
  static void posix_cpu_timers_init_group(struct signal_struct *sig)
  {
78d7d407b   Jiri Slaby   kernel core: use ...
879
  	unsigned long cpu_limit;
f06febc96   Frank Mayhar   timers: fix itime...
880
881
  	/* Thread group counters. */
  	thread_group_cputime_init(sig);
78d7d407b   Jiri Slaby   kernel core: use ...
882
883
884
  	cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
  	if (cpu_limit != RLIM_INFINITY) {
  		sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
6279a751f   Oleg Nesterov   posix-timers: fix...
885
886
  		sig->cputimer.running = 1;
  	}
f06febc96   Frank Mayhar   timers: fix itime...
887
888
889
890
891
  	/* The timer lists. */
  	INIT_LIST_HEAD(&sig->cpu_timers[0]);
  	INIT_LIST_HEAD(&sig->cpu_timers[1]);
  	INIT_LIST_HEAD(&sig->cpu_timers[2]);
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
892
  static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
893
894
  {
  	struct signal_struct *sig;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
895

4ab6c0833   Oleg Nesterov   clone(): fix race...
896
  	if (clone_flags & CLONE_THREAD)
490dea45d   Peter Zijlstra   itimers: remove t...
897
  		return 0;
490dea45d   Peter Zijlstra   itimers: remove t...
898

a56704ef6   Veaceslav Falico   copy_signal() cle...
899
  	sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
900
901
902
  	tsk->signal = sig;
  	if (!sig)
  		return -ENOMEM;
b3ac022cb   Oleg Nesterov   proc: turn signal...
903
  	sig->nr_threads = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
904
  	atomic_set(&sig->live, 1);
b3ac022cb   Oleg Nesterov   proc: turn signal...
905
  	atomic_set(&sig->sigcnt, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
906
  	init_waitqueue_head(&sig->wait_chldexit);
b3bfa0cba   Sukadev Bhattiprolu   signals: protect ...
907
908
  	if (clone_flags & CLONE_NEWPID)
  		sig->flags |= SIGNAL_UNKILLABLE;
db51aeccd   Oleg Nesterov   signals: microopt...
909
  	sig->curr_target = tsk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
910
911
  	init_sigpending(&sig->shared_pending);
  	INIT_LIST_HEAD(&sig->posix_timers);
c9cb2e3d7   Thomas Gleixner   [PATCH] hrtimers:...
912
  	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
913
  	sig->real_timer.function = it_real_fn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
914

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
915
916
917
  	task_lock(current->group_leader);
  	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
  	task_unlock(current->group_leader);
6279a751f   Oleg Nesterov   posix-timers: fix...
918
  	posix_cpu_timers_init_group(sig);
522ed7767   Miloslav Trmac   Audit: add TTY in...
919
  	tty_audit_fork(sig);
5091faa44   Mike Galbraith   sched: Add 'autog...
920
  	sched_autogroup_fork(sig);
522ed7767   Miloslav Trmac   Audit: add TTY in...
921

4714d1d32   Ben Blum   cgroups: read-wri...
922
923
924
  #ifdef CONFIG_CGROUPS
  	init_rwsem(&sig->threadgroup_fork_lock);
  #endif
28b83c519   KOSAKI Motohiro   oom: move oom_adj...
925
  	sig->oom_adj = current->signal->oom_adj;
a63d83f42   David Rientjes   oom: badness heur...
926
  	sig->oom_score_adj = current->signal->oom_score_adj;
dabb16f63   Mandeep Singh Baines   oom: allow a non-...
927
  	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
28b83c519   KOSAKI Motohiro   oom: move oom_adj...
928

9b1bf12d5   KOSAKI Motohiro   signals: move cre...
929
  	mutex_init(&sig->cred_guard_mutex);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
930
931
  	return 0;
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
932
  static void copy_flags(unsigned long clone_flags, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
933
934
  {
  	unsigned long new_flags = p->flags;
21aa9af03   Tejun Heo   sched: add hooks ...
935
  	new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
936
  	new_flags |= PF_FORKNOEXEC;
09a05394f   Roland McGrath   tracehook: clone
937
  	new_flags |= PF_STARTING;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
938
  	p->flags = new_flags;
2e1318956   Rafael J. Wysocki   freezer: prevent ...
939
  	clear_freeze_flag(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
940
  }
17da2bd90   Heiko Carstens   [CVE-2009-0029] S...
941
  SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
942
943
  {
  	current->clear_child_tid = tidptr;
b488893a3   Pavel Emelyanov   pid namespaces: c...
944
  	return task_pid_vnr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
945
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
946
  static void rt_mutex_init_task(struct task_struct *p)
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
947
  {
1d6154825   Thomas Gleixner   sched: Convert pi...
948
  	raw_spin_lock_init(&p->pi_lock);
e29e175b0   Zilvinas Valinskas   [PATCH] initialis...
949
  #ifdef CONFIG_RT_MUTEXES
1d6154825   Thomas Gleixner   sched: Convert pi...
950
  	plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
951
  	p->pi_blocked_on = NULL;
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
952
953
  #endif
  }
cf475ad28   Balbir Singh   cgroups: add an o...
954
955
956
957
958
959
  #ifdef CONFIG_MM_OWNER
  void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
  {
  	mm->owner = p;
  }
  #endif /* CONFIG_MM_OWNER */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
960
  /*
f06febc96   Frank Mayhar   timers: fix itime...
961
962
963
964
965
966
967
968
969
970
971
972
973
   * Initialize POSIX timer handling for a single task.
   */
  static void posix_cpu_timers_init(struct task_struct *tsk)
  {
  	tsk->cputime_expires.prof_exp = cputime_zero;
  	tsk->cputime_expires.virt_exp = cputime_zero;
  	tsk->cputime_expires.sched_exp = 0;
  	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
974
975
976
977
978
979
980
   * This creates a new process as a copy of the old one,
   * but does not actually start it yet.
   *
   * It copies the registers, and all the appropriate
   * parts of the process environment (as per the clone
   * flags). The actual kick-off is left to the caller.
   */
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
981
982
983
984
  static struct task_struct *copy_process(unsigned long clone_flags,
  					unsigned long stack_start,
  					struct pt_regs *regs,
  					unsigned long stack_size,
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
985
  					int __user *child_tidptr,
09a05394f   Roland McGrath   tracehook: clone
986
987
  					struct pid *pid,
  					int trace)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
988
989
  {
  	int retval;
a24efe62d   Mariusz Kozlowski   kernel/fork.c: re...
990
  	struct task_struct *p;
b4f48b636   Paul Menage   Task Control Grou...
991
  	int cgroup_callbacks_done = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
  
  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
  		return ERR_PTR(-EINVAL);
  
  	/*
  	 * Thread groups must share signals as well, and detached threads
  	 * can only be started up within the thread group.
  	 */
  	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
  		return ERR_PTR(-EINVAL);
  
  	/*
  	 * Shared signal handlers imply shared VM. By way of the above,
  	 * thread groups also imply shared VM. Blocking this case allows
  	 * for various simplifications in other code.
  	 */
  	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
  		return ERR_PTR(-EINVAL);
123be07b0   Sukadev Bhattiprolu   fork(): disable C...
1010
1011
1012
1013
1014
1015
1016
1017
1018
  	/*
  	 * Siblings of global init remain as zombies on exit since they are
  	 * not reaped by their parent (swapper). To solve this and to avoid
  	 * multi-rooted process trees, prevent global and container-inits
  	 * from creating siblings.
  	 */
  	if ((clone_flags & CLONE_PARENT) &&
  				current->signal->flags & SIGNAL_UNKILLABLE)
  		return ERR_PTR(-EINVAL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1019
1020
1021
1022
1023
1024
1025
1026
  	retval = security_task_create(clone_flags);
  	if (retval)
  		goto fork_out;
  
  	retval = -ENOMEM;
  	p = dup_task_struct(current);
  	if (!p)
  		goto fork_out;
f7e8b616e   Steven Rostedt   function-graph: m...
1027
  	ftrace_graph_init_task(p);
bea493a03   Peter Zijlstra   [PATCH] rt-mutex:...
1028
  	rt_mutex_init_task(p);
d12c1a379   Ingo Molnar   lockdep: fix kern...
1029
  #ifdef CONFIG_PROVE_LOCKING
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1030
1031
1032
  	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
  	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1033
  	retval = -EAGAIN;
3b11a1dec   David Howells   CRED: Differentia...
1034
  	if (atomic_read(&p->real_cred->user->processes) >=
78d7d407b   Jiri Slaby   kernel core: use ...
1035
  			task_rlimit(p, RLIMIT_NPROC)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1036
  		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
18b6e0414   Serge Hallyn   User namespaces: ...
1037
  		    p->real_cred->user != INIT_USER)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1038
1039
  			goto bad_fork_free;
  	}
f1752eec6   David Howells   CRED: Detach the ...
1040
1041
1042
  	retval = copy_creds(p, clone_flags);
  	if (retval < 0)
  		goto bad_fork_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1043
1044
1045
1046
1047
1048
  
  	/*
  	 * If multiple threads are within copy_process(), then this check
  	 * triggers too late. This doesn't hurt, the check is only there
  	 * to stop root fork bombs.
  	 */
04ec93fe9   Li Zefan   fork.c: fix NULL ...
1049
  	retval = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1050
1051
  	if (nr_threads >= max_threads)
  		goto bad_fork_cleanup_count;
a1261f546   Al Viro   [PATCH] m68k: int...
1052
  	if (!try_module_get(task_thread_info(p)->exec_domain->module))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1053
  		goto bad_fork_cleanup_count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1054
  	p->did_exec = 0;
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
1055
  	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1056
  	copy_flags(clone_flags, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1057
1058
  	INIT_LIST_HEAD(&p->children);
  	INIT_LIST_HEAD(&p->sibling);
f41d911f8   Paul E. McKenney   rcu: Merge preemp...
1059
  	rcu_copy_process(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1060
1061
  	p->vfork_done = NULL;
  	spin_lock_init(&p->alloc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1062

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1063
1064
1065
1066
  	init_sigpending(&p->pending);
  
  	p->utime = cputime_zero;
  	p->stime = cputime_zero;
9ac52315d   Laurent Vivier   sched: guest CPU ...
1067
  	p->gtime = cputime_zero;
c66f08be7   Michael Neuling   Add scaled time t...
1068
1069
  	p->utimescaled = cputime_zero;
  	p->stimescaled = cputime_zero;
d99ca3b97   Hidetoshi Seto   sched, cputime: C...
1070
  #ifndef CONFIG_VIRT_CPU_ACCOUNTING
73a2bcb0e   Peter Zijlstra   sched: keep utime...
1071
  	p->prev_utime = cputime_zero;
9301899be   Balbir Singh   sched: fix /proc/...
1072
  	p->prev_stime = cputime_zero;
d99ca3b97   Hidetoshi Seto   sched, cputime: C...
1073
  #endif
a3a2e76c7   KAMEZAWA Hiroyuki   mm: avoid null-po...
1074
1075
1076
  #if defined(SPLIT_RSS_COUNTING)
  	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
  #endif
172ba844a   Balbir Singh   sched: update del...
1077

6976675d9   Arjan van de Ven   hrtimer: create a...
1078
  	p->default_timer_slack_ns = current->timer_slack_ns;
5995477ab   Andrea Righi   task IO accountin...
1079
  	task_io_accounting_init(&p->ioac);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1080
  	acct_clear_integrals(p);
f06febc96   Frank Mayhar   timers: fix itime...
1081
  	posix_cpu_timers_init(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1082

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1083
  	do_posix_clock_monotonic_gettime(&p->start_time);
924b42d5a   Tomas Janousek   Use boot based ti...
1084
1085
  	p->real_start_time = p->start_time;
  	monotonic_to_bootbased(&p->real_start_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1086
  	p->io_context = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1087
  	p->audit_context = NULL;
4714d1d32   Ben Blum   cgroups: read-wri...
1088
1089
  	if (clone_flags & CLONE_THREAD)
  		threadgroup_fork_read_lock(current);
b4f48b636   Paul Menage   Task Control Grou...
1090
  	cgroup_fork(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1091
  #ifdef CONFIG_NUMA
846a16bf0   Lee Schermerhorn   mempolicy: rename...
1092
  	p->mempolicy = mpol_dup(p->mempolicy);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
1094
1095
   	if (IS_ERR(p->mempolicy)) {
   		retval = PTR_ERR(p->mempolicy);
   		p->mempolicy = NULL;
b4f48b636   Paul Menage   Task Control Grou...
1096
   		goto bad_fork_cleanup_cgroup;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1097
   	}
c61afb181   Paul Jackson   [PATCH] cpuset me...
1098
  	mpol_fix_fork_child_flag(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1099
  #endif
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1100
1101
  #ifdef CONFIG_TRACE_IRQFLAGS
  	p->irq_events = 0;
b36e4758d   Russell King   [ARM] Fix kernel/...
1102
1103
1104
  #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
  	p->hardirqs_enabled = 1;
  #else
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1105
  	p->hardirqs_enabled = 0;
b36e4758d   Russell King   [ARM] Fix kernel/...
1106
  #endif
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
  	p->hardirq_enable_ip = 0;
  	p->hardirq_enable_event = 0;
  	p->hardirq_disable_ip = _THIS_IP_;
  	p->hardirq_disable_event = 0;
  	p->softirqs_enabled = 1;
  	p->softirq_enable_ip = _THIS_IP_;
  	p->softirq_enable_event = 0;
  	p->softirq_disable_ip = 0;
  	p->softirq_disable_event = 0;
  	p->hardirq_context = 0;
  	p->softirq_context = 0;
  #endif
fbb9ce953   Ingo Molnar   [PATCH] lockdep: ...
1119
1120
1121
1122
1123
  #ifdef CONFIG_LOCKDEP
  	p->lockdep_depth = 0; /* no locks held yet */
  	p->curr_chain_key = 0;
  	p->lockdep_recursion = 0;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1124

408894ee4   Ingo Molnar   [PATCH] mutex sub...
1125
1126
1127
  #ifdef CONFIG_DEBUG_MUTEXES
  	p->blocked_on = NULL; /* not blocked yet */
  #endif
569b846df   KAMEZAWA Hiroyuki   memcg: coalesce u...
1128
1129
1130
1131
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  	p->memcg_batch.do_batch = 0;
  	p->memcg_batch.memcg = NULL;
  #endif
0f4814065   Markus Metzger   x86, ptrace: add ...
1132

3c90e6e99   Srivatsa Vaddagiri   sched: fix copy_n...
1133
  	/* Perform scheduler related setup. Assign this task to a CPU. */
3e51e3edf   Samir Bellabes   sched: Remove unu...
1134
  	sched_fork(p);
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1135

cdd6c482c   Ingo Molnar   perf: Do the big ...
1136
  	retval = perf_event_init_task(p);
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1137
1138
  	if (retval)
  		goto bad_fork_cleanup_policy;
3c90e6e99   Srivatsa Vaddagiri   sched: fix copy_n...
1139

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1140
  	if ((retval = audit_alloc(p)))
f1752eec6   David Howells   CRED: Detach the ...
1141
  		goto bad_fork_cleanup_policy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
  	/* copy all the process information */
  	if ((retval = copy_semundo(clone_flags, p)))
  		goto bad_fork_cleanup_audit;
  	if ((retval = copy_files(clone_flags, p)))
  		goto bad_fork_cleanup_semundo;
  	if ((retval = copy_fs(clone_flags, p)))
  		goto bad_fork_cleanup_files;
  	if ((retval = copy_sighand(clone_flags, p)))
  		goto bad_fork_cleanup_fs;
  	if ((retval = copy_signal(clone_flags, p)))
  		goto bad_fork_cleanup_sighand;
  	if ((retval = copy_mm(clone_flags, p)))
  		goto bad_fork_cleanup_signal;
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1155
  	if ((retval = copy_namespaces(clone_flags, p)))
d84f4f992   David Howells   CRED: Inaugurate ...
1156
  		goto bad_fork_cleanup_mm;
fadad878c   Jens Axboe   kernel: add CLONE...
1157
  	if ((retval = copy_io(clone_flags, p)))
fd0928df9   Jens Axboe   ioprio: move io p...
1158
  		goto bad_fork_cleanup_namespaces;
6f2c55b84   Alexey Dobriyan   Simplify copy_thr...
1159
  	retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1160
  	if (retval)
fd0928df9   Jens Axboe   ioprio: move io p...
1161
  		goto bad_fork_cleanup_io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1162

425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1163
1164
  	if (pid != &init_struct_pid) {
  		retval = -ENOMEM;
61bce0f13   Eric W. Biederman   pid: generalize t...
1165
  		pid = alloc_pid(p->nsproxy->pid_ns);
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1166
  		if (!pid)
fd0928df9   Jens Axboe   ioprio: move io p...
1167
  			goto bad_fork_cleanup_io;
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1168
1169
1170
1171
1172
1173
  	}
  
  	p->pid = pid_nr(pid);
  	p->tgid = p->pid;
  	if (clone_flags & CLONE_THREAD)
  		p->tgid = current->tgid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1174
1175
1176
1177
1178
  	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
  	/*
  	 * Clear TID on mm_release()?
  	 */
  	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
73c101011   Jens Axboe   block: initial pa...
1179
1180
1181
  #ifdef CONFIG_BLOCK
  	p->plug = NULL;
  #endif
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1182
  #ifdef CONFIG_FUTEX
8f17d3a50   Ingo Molnar   [PATCH] lightweig...
1183
1184
1185
1186
  	p->robust_list = NULL;
  #ifdef CONFIG_COMPAT
  	p->compat_robust_list = NULL;
  #endif
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1187
1188
  	INIT_LIST_HEAD(&p->pi_state_list);
  	p->pi_state_cache = NULL;
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1189
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1190
  	/*
f9a3879ab   GOTO Masanori   [PATCH] Fix sigal...
1191
1192
1193
1194
1195
1196
  	 * sigaltstack should be cleared when sharing the same VM
  	 */
  	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
  		p->sas_ss_sp = p->sas_ss_size = 0;
  
  	/*
6580807da   Oleg Nesterov   ptrace: copy_proc...
1197
1198
  	 * Syscall tracing and stepping should be turned off in the
  	 * child regardless of CLONE_PTRACE.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1199
  	 */
6580807da   Oleg Nesterov   ptrace: copy_proc...
1200
  	user_disable_single_step(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1201
  	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
ed75e8d58   Laurent Vivier   [PATCH] UML Suppo...
1202
1203
1204
  #ifdef TIF_SYSCALL_EMU
  	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
  #endif
9745512ce   Arjan van de Ven   sched: latencytop...
1205
  	clear_all_latency_tracing(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1206

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1207
1208
1209
1210
  	/* ok, now we should be set up.. */
  	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
  	p->pdeath_signal = 0;
  	p->exit_state = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1211
1212
1213
1214
1215
  	/*
  	 * Ok, make it visible to the rest of the system.
  	 * We dont wake it up yet.
  	 */
  	p->group_leader = p;
47e65328a   Oleg Nesterov   [PATCH] pids: kil...
1216
  	INIT_LIST_HEAD(&p->thread_group);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1217

b4f48b636   Paul Menage   Task Control Grou...
1218
1219
1220
1221
1222
  	/* Now that the task is set up, run cgroup callbacks if
  	 * necessary. We need to run them before the task is visible
  	 * on the tasklist. */
  	cgroup_fork_callbacks(p);
  	cgroup_callbacks_done = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1223
1224
  	/* Need tasklist lock for parent etc handling! */
  	write_lock_irq(&tasklist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1225
  	/* CLONE_PARENT re-uses the old parent */
2d5516cbb   Oleg Nesterov   copy_process: fix...
1226
  	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1227
  		p->real_parent = current->real_parent;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1228
1229
  		p->parent_exec_id = current->parent_exec_id;
  	} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1230
  		p->real_parent = current;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1231
1232
  		p->parent_exec_id = current->self_exec_id;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1233

3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1234
  	spin_lock(&current->sighand->siglock);
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1235
1236
1237
1238
1239
1240
1241
1242
1243
  
  	/*
  	 * Process group and session signals need to be delivered to just the
  	 * parent before the fork or both the parent and the child after the
  	 * fork. Restart if a signal comes in before we add the new process to
  	 * it's process group.
  	 * A fatal signal pending means that current will exit, so the new
  	 * thread can't slip out of an OOM kill (or normal SIGKILL).
   	 */
23ff44402   Daniel Walker   whitespace fixes:...
1244
  	recalc_sigpending();
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1245
1246
1247
1248
  	if (signal_pending(current)) {
  		spin_unlock(&current->sighand->siglock);
  		write_unlock_irq(&tasklist_lock);
  		retval = -ERESTARTNOINTR;
f7e8b616e   Steven Rostedt   function-graph: m...
1249
  		goto bad_fork_free_pid;
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1250
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1251
  	if (clone_flags & CLONE_THREAD) {
b3ac022cb   Oleg Nesterov   proc: turn signal...
1252
  		current->signal->nr_threads++;
4ab6c0833   Oleg Nesterov   clone(): fix race...
1253
  		atomic_inc(&current->signal->live);
b3ac022cb   Oleg Nesterov   proc: turn signal...
1254
  		atomic_inc(&current->signal->sigcnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
  		p->group_leader = current->group_leader;
47e65328a   Oleg Nesterov   [PATCH] pids: kil...
1256
  		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1257
  	}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1258
  	if (likely(p->pid)) {
09a05394f   Roland McGrath   tracehook: clone
1259
  		tracehook_finish_clone(p, clone_flags, trace);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1260
1261
  
  		if (thread_group_leader(p)) {
45a68628d   Eric W. Biederman   pid: remove the c...
1262
  			if (is_child_reaper(pid))
30e49c263   Pavel Emelyanov   pid namespaces: a...
1263
  				p->nsproxy->pid_ns->child_reaper = p;
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1264

fea9d1755   Oleg Nesterov   ITIMER_REAL: conv...
1265
  			p->signal->leader_pid = pid;
9c9f4ded9   Alan Cox   tty: Add a kref c...
1266
  			p->signal->tty = tty_kref_get(current->signal->tty);
5cd17569f   Eric W. Biederman   fix clone(CLONE_N...
1267
1268
  			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
  			attach_pid(p, PIDTYPE_SID, task_session(current));
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
1269
  			list_add_tail(&p->sibling, &p->real_parent->children);
5e85d4abe   Eric W. Biederman   [PATCH] task: Mak...
1270
  			list_add_tail_rcu(&p->tasks, &init_task.tasks);
909ea9646   Christoph Lameter   core: Replace __g...
1271
  			__this_cpu_inc(process_counts);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1272
  		}
85868995d   Sukadev Bhattiprolu   Use struct pid pa...
1273
  		attach_pid(p, PIDTYPE_PID, pid);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1274
  		nr_threads++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1275
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1276
  	total_forks++;
3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1277
  	spin_unlock(&current->sighand->siglock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1278
  	write_unlock_irq(&tasklist_lock);
c13cf856c   Andrew Morton   [PATCH] fork.c: p...
1279
  	proc_fork_connector(p);
817929ec2   Paul Menage   Task Control Grou...
1280
  	cgroup_post_fork(p);
4714d1d32   Ben Blum   cgroups: read-wri...
1281
1282
  	if (clone_flags & CLONE_THREAD)
  		threadgroup_fork_read_unlock(current);
cdd6c482c   Ingo Molnar   perf: Do the big ...
1283
  	perf_event_fork(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284
  	return p;
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1285
1286
1287
  bad_fork_free_pid:
  	if (pid != &init_struct_pid)
  		free_pid(pid);
fd0928df9   Jens Axboe   ioprio: move io p...
1288
  bad_fork_cleanup_io:
b69f22920   Louis Rilling   block: Fix io_con...
1289
1290
  	if (p->io_context)
  		exit_io_context(p);
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1291
  bad_fork_cleanup_namespaces:
444f378b2   Linus Torvalds   Revert "[PATCH] n...
1292
  	exit_task_namespaces(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1293
  bad_fork_cleanup_mm:
3d5992d2a   Ying Han   oom: add per-mm o...
1294
1295
1296
1297
1298
  	if (p->mm) {
  		task_lock(p);
  		if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
  			atomic_dec(&p->mm->oom_disable_count);
  		task_unlock(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1299
  		mmput(p->mm);
3d5992d2a   Ying Han   oom: add per-mm o...
1300
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
  bad_fork_cleanup_signal:
4ab6c0833   Oleg Nesterov   clone(): fix race...
1302
  	if (!(clone_flags & CLONE_THREAD))
1c5354de9   Mike Galbraith   sched: Move sched...
1303
  		free_signal_struct(p->signal);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1304
  bad_fork_cleanup_sighand:
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
1305
  	__cleanup_sighand(p->sighand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1306
1307
1308
1309
1310
1311
1312
1313
  bad_fork_cleanup_fs:
  	exit_fs(p); /* blocking */
  bad_fork_cleanup_files:
  	exit_files(p); /* blocking */
  bad_fork_cleanup_semundo:
  	exit_sem(p);
  bad_fork_cleanup_audit:
  	audit_free(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1314
  bad_fork_cleanup_policy:
cdd6c482c   Ingo Molnar   perf: Do the big ...
1315
  	perf_event_free_task(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1316
  #ifdef CONFIG_NUMA
f0be3d32b   Lee Schermerhorn   mempolicy: rename...
1317
  	mpol_put(p->mempolicy);
b4f48b636   Paul Menage   Task Control Grou...
1318
  bad_fork_cleanup_cgroup:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1319
  #endif
4714d1d32   Ben Blum   cgroups: read-wri...
1320
1321
  	if (clone_flags & CLONE_THREAD)
  		threadgroup_fork_read_unlock(current);
b4f48b636   Paul Menage   Task Control Grou...
1322
  	cgroup_exit(p, cgroup_callbacks_done);
35df17c57   Shailabh Nagar   [PATCH] task dela...
1323
  	delayacct_tsk_free(p);
a1261f546   Al Viro   [PATCH] m68k: int...
1324
  	module_put(task_thread_info(p)->exec_domain->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1325
  bad_fork_cleanup_count:
d84f4f992   David Howells   CRED: Inaugurate ...
1326
  	atomic_dec(&p->cred->user->processes);
e0e817392   David Howells   CRED: Add some co...
1327
  	exit_creds(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1328
1329
  bad_fork_free:
  	free_task(p);
fe7d37d1f   Oleg Nesterov   [PATCH] copy_proc...
1330
1331
  fork_out:
  	return ERR_PTR(retval);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1332
  }
6b2fb3c65   Adrian Bunk   idle_regs() must ...
1333
  noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1334
1335
1336
1337
  {
  	memset(regs, 0, sizeof(struct pt_regs));
  	return regs;
  }
f106eee10   Oleg Nesterov   pids: fix fork_id...
1338
1339
1340
1341
1342
1343
1344
1345
1346
  static inline void init_idle_pids(struct pid_link *links)
  {
  	enum pid_type type;
  
  	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
  		INIT_HLIST_NODE(&links[type].node); /* not really needed */
  		links[type].pid = &init_struct_pid;
  	}
  }
9abcf40b1   Al Viro   [PATCH] fork_idle...
1347
  struct task_struct * __cpuinit fork_idle(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1348
  {
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1349
  	struct task_struct *task;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1350
  	struct pt_regs regs;
30e49c263   Pavel Emelyanov   pid namespaces: a...
1351
  	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
09a05394f   Roland McGrath   tracehook: clone
1352
  			    &init_struct_pid, 0);
f106eee10   Oleg Nesterov   pids: fix fork_id...
1353
1354
  	if (!IS_ERR(task)) {
  		init_idle_pids(task->pids);
753ca4f31   Akinobu Mita   [PATCH] fix copy_...
1355
  		init_idle(task, cpu);
f106eee10   Oleg Nesterov   pids: fix fork_id...
1356
  	}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1357

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1358
1359
  	return task;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
  /*
   *  Ok, this is the main fork-routine.
   *
   * It copies the process, and if successful kick-starts
   * it and waits for it to finish using the VM if required.
   */
  long do_fork(unsigned long clone_flags,
  	      unsigned long stack_start,
  	      struct pt_regs *regs,
  	      unsigned long stack_size,
  	      int __user *parent_tidptr,
  	      int __user *child_tidptr)
  {
  	struct task_struct *p;
  	int trace = 0;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1375
  	long nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1376

bdff746a3   Andrew Morton   clone: prepare to...
1377
  	/*
18b6e0414   Serge Hallyn   User namespaces: ...
1378
1379
1380
1381
1382
1383
1384
1385
1386
  	 * Do some preliminary argument and permissions checking before we
  	 * actually start allocating stuff
  	 */
  	if (clone_flags & CLONE_NEWUSER) {
  		if (clone_flags & CLONE_THREAD)
  			return -EINVAL;
  		/* hopefully this check will go away when userns support is
  		 * complete
  		 */
7657d9049   Serge E. Hallyn   user namespaces: ...
1387
1388
  		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
  				!capable(CAP_SETGID))
18b6e0414   Serge Hallyn   User namespaces: ...
1389
1390
1391
1392
  			return -EPERM;
  	}
  
  	/*
09a05394f   Roland McGrath   tracehook: clone
1393
1394
1395
1396
  	 * When called from kernel_thread, don't do user tracing stuff.
  	 */
  	if (likely(user_mode(regs)))
  		trace = tracehook_prepare_clone(clone_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1397

a6f5e0637   Sukadev Bhattiprolu   pid namespaces: m...
1398
  	p = copy_process(clone_flags, stack_start, regs, stack_size,
09a05394f   Roland McGrath   tracehook: clone
1399
  			 child_tidptr, NULL, trace);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1400
1401
1402
1403
1404
1405
  	/*
  	 * Do this prior waking up the new thread - the thread pointer
  	 * might get invalid after that point, if the thread exits quickly.
  	 */
  	if (!IS_ERR(p)) {
  		struct completion vfork;
0a16b6075   Mathieu Desnoyers   tracing, sched: L...
1406
  		trace_sched_process_fork(current, p);
6c5f3e7b4   Pavel Emelyanov   Pidns: make full ...
1407
  		nr = task_pid_vnr(p);
30e49c263   Pavel Emelyanov   pid namespaces: a...
1408
1409
1410
  
  		if (clone_flags & CLONE_PARENT_SETTID)
  			put_user(nr, parent_tidptr);
a6f5e0637   Sukadev Bhattiprolu   pid namespaces: m...
1411

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1412
1413
1414
1415
  		if (clone_flags & CLONE_VFORK) {
  			p->vfork_done = &vfork;
  			init_completion(&vfork);
  		}
a64e64944   Al Viro   [PATCH] return re...
1416
  		audit_finish_fork(p);
087eb4370   Oleg Nesterov   ptrace: tracehook...
1417
  		tracehook_report_clone(regs, clone_flags, nr, p);
09a05394f   Roland McGrath   tracehook: clone
1418
1419
1420
1421
1422
1423
1424
1425
  
  		/*
  		 * We set PF_STARTING at creation in case tracing wants to
  		 * use this to distinguish a fully live task from one that
  		 * hasn't gotten to tracehook_report_clone() yet.  Now we
  		 * clear it and set the child going.
  		 */
  		p->flags &= ~PF_STARTING;
3e51e3edf   Samir Bellabes   sched: Remove unu...
1426
  		wake_up_new_task(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1427

09a05394f   Roland McGrath   tracehook: clone
1428
1429
  		tracehook_report_clone_complete(trace, regs,
  						clone_flags, nr, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1430
  		if (clone_flags & CLONE_VFORK) {
ba96a0c88   Rafael J. Wysocki   freezer: fix vfor...
1431
  			freezer_do_not_count();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1432
  			wait_for_completion(&vfork);
ba96a0c88   Rafael J. Wysocki   freezer: fix vfor...
1433
  			freezer_count();
daded34be   Roland McGrath   tracehook: vfork-...
1434
  			tracehook_report_vfork_done(p, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1435
1436
  		}
  	} else {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1437
  		nr = PTR_ERR(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1438
  	}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1439
  	return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1440
  }
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
1441
1442
1443
  #ifndef ARCH_MIN_MMSTRUCT_ALIGN
  #define ARCH_MIN_MMSTRUCT_ALIGN 0
  #endif
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
1444
  static void sighand_ctor(void *data)
aa1757f90   Oleg Nesterov   [PATCH] convert s...
1445
1446
  {
  	struct sighand_struct *sighand = data;
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
1447
  	spin_lock_init(&sighand->siglock);
b8fceee17   Davide Libenzi   signalfd simplifi...
1448
  	init_waitqueue_head(&sighand->signalfd_wqh);
aa1757f90   Oleg Nesterov   [PATCH] convert s...
1449
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1450
1451
1452
1453
  void __init proc_caches_init(void)
  {
  	sighand_cachep = kmem_cache_create("sighand_cache",
  			sizeof(struct sighand_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1454
1455
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
  			SLAB_NOTRACK, sighand_ctor);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1456
1457
  	signal_cachep = kmem_cache_create("signal_cache",
  			sizeof(struct signal_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1458
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
1459
  	files_cachep = kmem_cache_create("files_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1460
  			sizeof(struct files_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1461
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
1462
  	fs_cachep = kmem_cache_create("fs_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1463
  			sizeof(struct fs_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1464
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
6345d24da   Linus Torvalds   mm: Fix boot cras...
1465
1466
1467
1468
1469
1470
1471
  	/*
  	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
  	 * whole struct cpumask for the OFFSTACK case. We could change
  	 * this to *only* allocate as much of it as required by the
  	 * maximum number of CPU's we can ever have.  The cpumask_allocation
  	 * is at the end of the structure, exactly for that reason.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1472
  	mm_cachep = kmem_cache_create("mm_struct",
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
1473
  			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1474
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
33e5d7697   David Howells   nommu: fix a numb...
1475
  	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
8feae1311   David Howells   NOMMU: Make VMAs ...
1476
  	mmap_init();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1477
  }
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1478

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1479
  /*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1480
   * Check constraints on flags passed to the unshare system call.
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1481
   */
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1482
  static int check_unshare_flags(unsigned long unshare_flags)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1483
  {
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1484
1485
1486
1487
  	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
  				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
  				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
  		return -EINVAL;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1488
  	/*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1489
1490
1491
  	 * Not implemented, but pretend it works if there is nothing to
  	 * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
  	 * needs to unshare vm.
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1492
  	 */
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1493
1494
1495
1496
1497
  	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
  		/* FIXME: get_task_mm() increments ->mm_users */
  		if (atomic_read(&current->mm->mm_users) > 1)
  			return -EINVAL;
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1498
1499
1500
1501
1502
  
  	return 0;
  }
  
  /*
99d1419d9   JANAK DESAI   [PATCH] unshare s...
1503
   * Unshare the filesystem structure if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1504
1505
1506
1507
   */
  static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
  {
  	struct fs_struct *fs = current->fs;
498052bba   Al Viro   New locking/refco...
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
  	if (!(unshare_flags & CLONE_FS) || !fs)
  		return 0;
  
  	/* don't need lock here; in the worst case we'll do useless copy */
  	if (fs->users == 1)
  		return 0;
  
  	*new_fsp = copy_fs_struct(fs);
  	if (!*new_fsp)
  		return -ENOMEM;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1518
1519
1520
1521
1522
  
  	return 0;
  }
  
  /*
a016f3389   JANAK DESAI   [PATCH] unshare s...
1523
   * Unshare file descriptor table if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1524
1525
1526
1527
   */
  static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
  {
  	struct files_struct *fd = current->files;
a016f3389   JANAK DESAI   [PATCH] unshare s...
1528
  	int error = 0;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1529
1530
  
  	if ((unshare_flags & CLONE_FILES) &&
a016f3389   JANAK DESAI   [PATCH] unshare s...
1531
1532
1533
1534
1535
  	    (fd && atomic_read(&fd->count) > 1)) {
  		*new_fdp = dup_fd(fd, &error);
  		if (!*new_fdp)
  			return error;
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1536
1537
1538
1539
1540
  
  	return 0;
  }
  
  /*
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1541
1542
1543
1544
1545
1546
1547
   * unshare allows a process to 'unshare' part of the process
   * context which was originally shared using clone.  copy_*
   * functions used by do_fork() cannot be used here directly
   * because they modify an inactive task_struct that is being
   * constructed. Here we are modifying the current, active,
   * task_struct.
   */
6559eed8c   Heiko Carstens   [CVE-2009-0029] S...
1548
  SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1549
  {
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1550
  	struct fs_struct *fs, *new_fs = NULL;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1551
  	struct files_struct *fd, *new_fd = NULL;
cf7b708c8   Pavel Emelyanov   Make access to ta...
1552
  	struct nsproxy *new_nsproxy = NULL;
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1553
  	int do_sysvsem = 0;
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1554
  	int err;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1555

9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1556
1557
  	err = check_unshare_flags(unshare_flags);
  	if (err)
06f9d4f94   Eric W. Biederman   [PATCH] unshare: ...
1558
  		goto bad_unshare_out;
6013f67fc   Manfred Spraul   ipc: sysvsem: for...
1559
  	/*
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1560
1561
1562
1563
1564
  	 * If unsharing namespace, must also unshare filesystem information.
  	 */
  	if (unshare_flags & CLONE_NEWNS)
  		unshare_flags |= CLONE_FS;
  	/*
6013f67fc   Manfred Spraul   ipc: sysvsem: for...
1565
1566
1567
1568
1569
  	 * CLONE_NEWIPC must also detach from the undolist: after switching
  	 * to a new ipc namespace, the semaphore arrays from the old
  	 * namespace are unreachable.
  	 */
  	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1570
  		do_sysvsem = 1;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1571
  	if ((err = unshare_fs(unshare_flags, &new_fs)))
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1572
  		goto bad_unshare_out;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1573
  	if ((err = unshare_fd(unshare_flags, &new_fd)))
9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1574
  		goto bad_unshare_cleanup_fs;
e3222c4ec   Badari Pulavarty   Merge sys_clone()...
1575
1576
  	if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
  			new_fs)))
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1577
  		goto bad_unshare_cleanup_fd;
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1578

9bfb23fc4   Oleg Nesterov   sys_unshare: remo...
1579
  	if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1580
1581
1582
1583
1584
1585
  		if (do_sysvsem) {
  			/*
  			 * CLONE_SYSVSEM is equivalent to sys_exit().
  			 */
  			exit_sem(current);
  		}
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1586

c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1587
  		if (new_nsproxy) {
cf7b708c8   Pavel Emelyanov   Make access to ta...
1588
1589
  			switch_task_namespaces(current, new_nsproxy);
  			new_nsproxy = NULL;
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1590
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1591

cf7b708c8   Pavel Emelyanov   Make access to ta...
1592
  		task_lock(current);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1593
1594
  		if (new_fs) {
  			fs = current->fs;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1595
  			spin_lock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1596
  			current->fs = new_fs;
498052bba   Al Viro   New locking/refco...
1597
1598
1599
1600
  			if (--fs->users)
  				new_fs = NULL;
  			else
  				new_fs = fs;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1601
  			spin_unlock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1602
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1603
1604
1605
1606
1607
1608
1609
1610
  		if (new_fd) {
  			fd = current->files;
  			current->files = new_fd;
  			new_fd = fd;
  		}
  
  		task_unlock(current);
  	}
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1611
  	if (new_nsproxy)
444f378b2   Linus Torvalds   Revert "[PATCH] n...
1612
  		put_nsproxy(new_nsproxy);
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1613

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1614
1615
1616
  bad_unshare_cleanup_fd:
  	if (new_fd)
  		put_files_struct(new_fd);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1617
1618
  bad_unshare_cleanup_fs:
  	if (new_fs)
498052bba   Al Viro   New locking/refco...
1619
  		free_fs_struct(new_fs);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1620

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1621
1622
1623
  bad_unshare_out:
  	return err;
  }
3b1253880   Al Viro   [PATCH] sanitize ...
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
  
  /*
   *	Helper to unshare the files of the current task.
   *	We don't want to expose copy_files internals to
   *	the exec layer of the kernel.
   */
  
  int unshare_files(struct files_struct **displaced)
  {
  	struct task_struct *task = current;
50704516f   Al Viro   Fix uninitialized...
1634
  	struct files_struct *copy = NULL;
3b1253880   Al Viro   [PATCH] sanitize ...
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
  	int error;
  
  	error = unshare_fd(CLONE_FILES, &copy);
  	if (error || !copy) {
  		*displaced = NULL;
  		return error;
  	}
  	*displaced = task->files;
  	task_lock(task);
  	task->files = copy;
  	task_unlock(task);
  	return 0;
  }