Blame view

kernel/fork.c 41.3 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
  /*
   *  linux/kernel/fork.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
  /*
   *  'fork.c' contains the help-routines for the 'fork' system call
   * (see also entry.S and others).
   * Fork is rather simple, once you get the hang of it, but the memory
   * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
  #include <linux/slab.h>
  #include <linux/init.h>
  #include <linux/unistd.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
18
  #include <linux/module.h>
  #include <linux/vmalloc.h>
  #include <linux/completion.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
22
  #include <linux/personality.h>
  #include <linux/mempolicy.h>
  #include <linux/sem.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
23
  #include <linux/fdtable.h>
da9cbc873   Jens Axboe   block: blkdev.h c...
24
  #include <linux/iocontext.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
  #include <linux/key.h>
  #include <linux/binfmts.h>
  #include <linux/mman.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
28
  #include <linux/mmu_notifier.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
29
  #include <linux/fs.h>
ab516013a   Serge E. Hallyn   [PATCH] namespace...
30
  #include <linux/nsproxy.h>
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
31
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  #include <linux/cpu.h>
b4f48b636   Paul Menage   Task Control Grou...
33
  #include <linux/cgroup.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
34
  #include <linux/security.h>
a1e78772d   Mel Gorman   hugetlb: reserve ...
35
  #include <linux/hugetlb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
37
38
  #include <linux/swap.h>
  #include <linux/syscalls.h>
  #include <linux/jiffies.h>
09a05394f   Roland McGrath   tracehook: clone
39
  #include <linux/tracehook.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
  #include <linux/futex.h>
8141c7f3e   Linus Torvalds   Move "exit_robust...
41
  #include <linux/compat.h>
7c3ab7381   Andrew Morton   [PATCH] io-accoun...
42
  #include <linux/task_io_accounting_ops.h>
ab2af1f50   Dipankar Sarma   [PATCH] files: fi...
43
  #include <linux/rcupdate.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44
45
46
  #include <linux/ptrace.h>
  #include <linux/mount.h>
  #include <linux/audit.h>
78fb74669   Pavel Emelianov   Memory controller...
47
  #include <linux/memcontrol.h>
f201ae235   Frederic Weisbecker   tracing/function-...
48
  #include <linux/ftrace.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
50
  #include <linux/profile.h>
  #include <linux/rmap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
51
  #include <linux/ksm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
  #include <linux/acct.h>
8f0ab5147   Jay Lan   [PATCH] csa: conv...
53
  #include <linux/tsacct_kern.h>
9f46080c4   Matt Helsley   [PATCH] Process E...
54
  #include <linux/cn_proc.h>
ba96a0c88   Rafael J. Wysocki   freezer: fix vfor...
55
  #include <linux/freezer.h>
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
56
  #include <linux/delayacct.h>
ad4ecbcba   Shailabh Nagar   [PATCH] delay acc...
57
  #include <linux/taskstats_kern.h>
0a4254058   Arjan van de Ven   [PATCH] Add the c...
58
  #include <linux/random.h>
522ed7767   Miloslav Trmac   Audit: add TTY in...
59
  #include <linux/tty.h>
6f4e64335   Pavel Emelyanov   pid namespaces: i...
60
  #include <linux/proc_fs.h>
fd0928df9   Jens Axboe   ioprio: move io p...
61
  #include <linux/blkdev.h>
5ad4e53bd   Al Viro   Get rid of indire...
62
  #include <linux/fs_struct.h>
7c9f8861e   Eric Sandeen   stackprotector: u...
63
  #include <linux/magic.h>
cdd6c482c   Ingo Molnar   perf: Do the big ...
64
  #include <linux/perf_event.h>
42c4ab41a   Stanislaw Gruszka   itimers: Merge IT...
65
  #include <linux/posix-timers.h>
8e7cac798   Avi Kivity   core: Fix user re...
66
  #include <linux/user-return-notifier.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67
68
69
70
71
72
73
  
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
  #include <asm/uaccess.h>
  #include <asm/mmu_context.h>
  #include <asm/cacheflush.h>
  #include <asm/tlbflush.h>
ad8d75fff   Steven Rostedt   tracing/events: m...
74
  #include <trace/events/sched.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
75
76
77
78
79
80
81
82
83
  /*
   * Protected counters by write_lock_irq(&tasklist_lock)
   */
  unsigned long total_forks;	/* Handle normal Linux uptimes. */
  int nr_threads; 		/* The idle threads do not count.. */
  
  int max_threads;		/* tunable limit on nr_threads */
  
  DEFINE_PER_CPU(unsigned long, process_counts) = 0;
c59923a15   Christoph Hellwig   [PATCH] remove th...
84
  __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
db1466b3e   Paul E. McKenney   rcu: Use wrapper ...
85
86
87
88
89
90
91
92
  
  #ifdef CONFIG_PROVE_RCU
  int lockdep_tasklist_lock_is_held(void)
  {
  	return lockdep_is_held(&tasklist_lock);
  }
  EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
  #endif /* #ifdef CONFIG_PROVE_RCU */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
94
95
96
97
  
  int nr_processes(void)
  {
  	int cpu;
  	int total = 0;
1d5107509   Ian Campbell   Correct nr_proces...
98
  	for_each_possible_cpu(cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99
100
101
102
103
104
105
106
  		total += per_cpu(process_counts, cpu);
  
  	return total;
  }
  
  #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
  # define alloc_task_struct()	kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
  # define free_task_struct(tsk)	kmem_cache_free(task_struct_cachep, (tsk))
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
107
  static struct kmem_cache *task_struct_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
  #endif
b69c49b78   FUJITA Tomonori   clean up duplicat...
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
  static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
  {
  #ifdef CONFIG_DEBUG_STACK_USAGE
  	gfp_t mask = GFP_KERNEL | __GFP_ZERO;
  #else
  	gfp_t mask = GFP_KERNEL;
  #endif
  	return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
  }
  
  static inline void free_thread_info(struct thread_info *ti)
  {
  	free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
  /* SLAB cache for signal_struct structures (tsk->signal) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
126
  static struct kmem_cache *signal_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
128
  
  /* SLAB cache for sighand_struct structures (tsk->sighand) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
129
  struct kmem_cache *sighand_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
131
  
  /* SLAB cache for files_struct structures (tsk->files) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
132
  struct kmem_cache *files_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
133
134
  
  /* SLAB cache for fs_struct structures (tsk->fs) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
135
  struct kmem_cache *fs_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
137
  
  /* SLAB cache for vm_area_struct structures */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
138
  struct kmem_cache *vm_area_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
140
  
  /* SLAB cache for mm_struct structures (tsk->mm) */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
141
  static struct kmem_cache *mm_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142

c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
143
144
145
146
147
148
  static void account_kernel_stack(struct thread_info *ti, int account)
  {
  	struct zone *zone = page_zone(virt_to_page(ti));
  
  	mod_zone_page_state(zone, NR_KERNEL_STACK, account);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
150
  void free_task(struct task_struct *tsk)
  {
3e26c149c   Peter Zijlstra   mm: dirty balanci...
151
  	prop_local_destroy_single(&tsk->dirties);
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
152
  	account_kernel_stack(tsk->stack, -1);
f7e4217b0   Roman Zippel   rename thread_inf...
153
  	free_thread_info(tsk->stack);
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
154
  	rt_mutex_debug_task_free(tsk);
fb52607af   Frederic Weisbecker   tracing/function-...
155
  	ftrace_graph_exit_task(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
157
158
  	free_task_struct(tsk);
  }
  EXPORT_SYMBOL(free_task);
158d9ebd1   Andrew Morton   [PATCH] resurrect...
159
  void __put_task_struct(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160
  {
270f722d4   Eugene Teo   Fix tsk->exit_sta...
161
  	WARN_ON(!tsk->exit_state);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
163
  	WARN_ON(atomic_read(&tsk->usage));
  	WARN_ON(tsk == current);
e0e817392   David Howells   CRED: Add some co...
164
  	exit_creds(tsk);
35df17c57   Shailabh Nagar   [PATCH] task dela...
165
  	delayacct_tsk_free(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
167
168
169
  
  	if (!profile_handoff_task(tsk))
  		free_task(tsk);
  }
2adee9b30   Suresh Siddha   x86: fpu xstate s...
170
171
172
173
174
175
176
  /*
   * macro override instead of weak attribute alias, to workaround
   * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
   */
  #ifndef arch_task_cache_init
  #define arch_task_cache_init()
  #endif
61c4628b5   Suresh Siddha   x86, fpu: split F...
177

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
179
180
181
182
183
184
185
186
  void __init fork_init(unsigned long mempages)
  {
  #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
  #ifndef ARCH_MIN_TASKALIGN
  #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
  #endif
  	/* create a slab on which task_structs can be allocated */
  	task_struct_cachep =
  		kmem_cache_create("task_struct", sizeof(struct task_struct),
2dff44052   Vegard Nossum   kmemcheck: add mm...
187
  			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
  #endif
61c4628b5   Suresh Siddha   x86, fpu: split F...
189
190
  	/* do the arch specific task caches init */
  	arch_task_cache_init();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  	/*
  	 * The default maximum number of threads is set to a safe
  	 * value: the thread structures can take up at most half
  	 * of memory.
  	 */
  	max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
  
  	/*
  	 * we need to allow at least 20 threads to boot a system
  	 */
  	if(max_threads < 20)
  		max_threads = 20;
  
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
  	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
  	init_task.signal->rlim[RLIMIT_SIGPENDING] =
  		init_task.signal->rlim[RLIMIT_NPROC];
  }
61c4628b5   Suresh Siddha   x86, fpu: split F...
209
210
211
212
213
214
  int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
  					       struct task_struct *src)
  {
  	*dst = *src;
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
216
217
218
  static struct task_struct *dup_task_struct(struct task_struct *orig)
  {
  	struct task_struct *tsk;
  	struct thread_info *ti;
7c9f8861e   Eric Sandeen   stackprotector: u...
219
  	unsigned long *stackend;
3e26c149c   Peter Zijlstra   mm: dirty balanci...
220
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
222
223
224
225
226
227
228
229
230
231
232
  
  	prepare_to_copy(orig);
  
  	tsk = alloc_task_struct();
  	if (!tsk)
  		return NULL;
  
  	ti = alloc_thread_info(tsk);
  	if (!ti) {
  		free_task_struct(tsk);
  		return NULL;
  	}
61c4628b5   Suresh Siddha   x86, fpu: split F...
233
234
235
   	err = arch_dup_task_struct(tsk, orig);
  	if (err)
  		goto out;
f7e4217b0   Roman Zippel   rename thread_inf...
236
  	tsk->stack = ti;
3e26c149c   Peter Zijlstra   mm: dirty balanci...
237
238
  
  	err = prop_local_init_single(&tsk->dirties);
61c4628b5   Suresh Siddha   x86, fpu: split F...
239
240
  	if (err)
  		goto out;
3e26c149c   Peter Zijlstra   mm: dirty balanci...
241

10ebffde3   Al Viro   [PATCH] m68k: int...
242
  	setup_thread_stack(tsk, orig);
8e7cac798   Avi Kivity   core: Fix user re...
243
  	clear_user_return_notifier(tsk);
7c9f8861e   Eric Sandeen   stackprotector: u...
244
245
  	stackend = end_of_stack(tsk);
  	*stackend = STACK_END_MAGIC;	/* for overflow detection */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
246

0a4254058   Arjan van de Ven   [PATCH] Add the c...
247
248
249
  #ifdef CONFIG_CC_STACKPROTECTOR
  	tsk->stack_canary = get_random_int();
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
251
  	/* One for us, one for whoever does the "release_task()" (usually parent) */
  	atomic_set(&tsk->usage,2);
4b5d37ac0   Giancarlo Formicuccia   [PATCH] Clear tas...
252
  	atomic_set(&tsk->fs_excl, 0);
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
253
  #ifdef CONFIG_BLK_DEV_IO_TRACE
2056a782f   Jens Axboe   [PATCH] Block que...
254
  	tsk->btrace_seq = 0;
6c5c93415   Alexey Dobriyan   [PATCH] ifdef blk...
255
  #endif
a0aa7f68a   Jens Axboe   [PATCH] Don't inh...
256
  	tsk->splice_pipe = NULL;
c6a7f5728   KOSAKI Motohiro   mm: oom analysis:...
257
258
  
  	account_kernel_stack(ti, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
259
  	return tsk;
61c4628b5   Suresh Siddha   x86, fpu: split F...
260
261
262
263
264
  
  out:
  	free_thread_info(ti);
  	free_task_struct(tsk);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
265
266
267
  }
  
  #ifdef CONFIG_MMU
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
268
  static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
269
  {
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
270
  	struct vm_area_struct *mpnt, *tmp, **pprev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
272
273
274
275
276
  	struct rb_node **rb_link, *rb_parent;
  	int retval;
  	unsigned long charge;
  	struct mempolicy *pol;
  
  	down_write(&oldmm->mmap_sem);
ec8c0446b   Ralf Baechle   [PATCH] Optimize ...
277
  	flush_cache_dup_mm(oldmm);
ad3394517   Ingo Molnar   [PATCH] lockdep: ...
278
279
280
281
  	/*
  	 * Not linked in yet - no deadlock potential:
  	 */
  	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
282

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283
284
285
286
  	mm->locked_vm = 0;
  	mm->mmap = NULL;
  	mm->mmap_cache = NULL;
  	mm->free_area_cache = oldmm->mmap_base;
1363c3cd8   Wolfgang Wander   [PATCH] Avoiding ...
287
  	mm->cached_hole_size = ~0UL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
288
  	mm->map_count = 0;
948942445   Rusty Russell   cpumask: use mm_c...
289
  	cpumask_clear(mm_cpumask(mm));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
290
291
292
293
  	mm->mm_rb = RB_ROOT;
  	rb_link = &mm->mm_rb.rb_node;
  	rb_parent = NULL;
  	pprev = &mm->mmap;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
294
295
296
  	retval = ksm_fork(mm, oldmm);
  	if (retval)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297

fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
298
  	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
299
300
301
  		struct file *file;
  
  		if (mpnt->vm_flags & VM_DONTCOPY) {
3b6bfcdb1   Hugh Dickins   [PATCH] lower VM_...
302
303
  			long pages = vma_pages(mpnt);
  			mm->total_vm -= pages;
ab50b8ed8   Hugh Dickins   [PATCH] mm: vm_st...
304
  			vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
3b6bfcdb1   Hugh Dickins   [PATCH] lower VM_...
305
  								-pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
306
307
308
309
310
311
312
313
314
  			continue;
  		}
  		charge = 0;
  		if (mpnt->vm_flags & VM_ACCOUNT) {
  			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
  			if (security_vm_enough_memory(len))
  				goto fail_nomem;
  			charge = len;
  		}
e94b17660   Christoph Lameter   [PATCH] slab: rem...
315
  		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
316
317
318
  		if (!tmp)
  			goto fail_nomem;
  		*tmp = *mpnt;
5beb49305   Rik van Riel   mm: change anon_v...
319
  		INIT_LIST_HEAD(&tmp->anon_vma_chain);
846a16bf0   Lee Schermerhorn   mempolicy: rename...
320
  		pol = mpol_dup(vma_policy(mpnt));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321
322
323
324
  		retval = PTR_ERR(pol);
  		if (IS_ERR(pol))
  			goto fail_nomem_policy;
  		vma_set_policy(tmp, pol);
5beb49305   Rik van Riel   mm: change anon_v...
325
326
  		if (anon_vma_fork(tmp, mpnt))
  			goto fail_nomem_anon_vma_fork;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
327
328
329
  		tmp->vm_flags &= ~VM_LOCKED;
  		tmp->vm_mm = mm;
  		tmp->vm_next = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
330
331
  		file = tmp->vm_file;
  		if (file) {
f3a43f3f6   Josef "Jeff" Sipek   [PATCH] kernel: c...
332
  			struct inode *inode = file->f_path.dentry->d_inode;
b88ed2059   Hugh Dickins   fix mapping_writa...
333
  			struct address_space *mapping = file->f_mapping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
335
336
  			get_file(file);
  			if (tmp->vm_flags & VM_DENYWRITE)
  				atomic_dec(&inode->i_writecount);
b88ed2059   Hugh Dickins   fix mapping_writa...
337
338
339
  			spin_lock(&mapping->i_mmap_lock);
  			if (tmp->vm_flags & VM_SHARED)
  				mapping->i_mmap_writable++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
  			tmp->vm_truncate_count = mpnt->vm_truncate_count;
b88ed2059   Hugh Dickins   fix mapping_writa...
341
342
  			flush_dcache_mmap_lock(mapping);
  			/* insert tmp into the share list, just after mpnt */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
343
  			vma_prio_tree_add(tmp, mpnt);
b88ed2059   Hugh Dickins   fix mapping_writa...
344
345
  			flush_dcache_mmap_unlock(mapping);
  			spin_unlock(&mapping->i_mmap_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
346
347
348
  		}
  
  		/*
a1e78772d   Mel Gorman   hugetlb: reserve ...
349
350
351
352
353
354
355
356
  		 * Clear hugetlb-related page reserves for children. This only
  		 * affects MAP_PRIVATE mappings. Faults generated by the child
  		 * are not guaranteed to succeed, even if read-only
  		 */
  		if (is_vm_hugetlb_page(tmp))
  			reset_vma_resv_huge_pages(tmp);
  
  		/*
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
357
  		 * Link in the new vma and copy the page table entries.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
358
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
359
360
361
362
363
364
365
366
  		*pprev = tmp;
  		pprev = &tmp->vm_next;
  
  		__vma_link_rb(mm, tmp, rb_link, rb_parent);
  		rb_link = &tmp->vm_rb.rb_right;
  		rb_parent = &tmp->vm_rb;
  
  		mm->map_count++;
0b0db14c5   Hugh Dickins   [PATCH] unpaged: ...
367
  		retval = copy_page_range(mm, oldmm, mpnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
368
369
370
371
372
373
374
  
  		if (tmp->vm_ops && tmp->vm_ops->open)
  			tmp->vm_ops->open(tmp);
  
  		if (retval)
  			goto out;
  	}
d6dd61c83   Jeremy Fitzhardinge   [PATCH] x86: PARA...
375
376
  	/* a new mm has just been created */
  	arch_dup_mmap(oldmm, mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
377
  	retval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378
  out:
7ee782325   Hugh Dickins   [PATCH] mm: dup_m...
379
  	up_write(&mm->mmap_sem);
fd3e42fcc   Hugh Dickins   [PATCH] mm: dup_m...
380
  	flush_tlb_mm(oldmm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
381
382
  	up_write(&oldmm->mmap_sem);
  	return retval;
5beb49305   Rik van Riel   mm: change anon_v...
383
384
  fail_nomem_anon_vma_fork:
  	mpol_put(pol);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
  fail_nomem_policy:
  	kmem_cache_free(vm_area_cachep, tmp);
  fail_nomem:
  	retval = -ENOMEM;
  	vm_unacct_memory(charge);
  	goto out;
  }
  
  static inline int mm_alloc_pgd(struct mm_struct * mm)
  {
  	mm->pgd = pgd_alloc(mm);
  	if (unlikely(!mm->pgd))
  		return -ENOMEM;
  	return 0;
  }
  
  static inline void mm_free_pgd(struct mm_struct * mm)
  {
5e5419734   Benjamin Herrenschmidt   add mm argument t...
403
  	pgd_free(mm, mm->pgd);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
404
405
406
407
408
409
  }
  #else
  #define dup_mmap(mm, oldmm)	(0)
  #define mm_alloc_pgd(mm)	(0)
  #define mm_free_pgd(mm)
  #endif /* CONFIG_MMU */
23ff44402   Daniel Walker   whitespace fixes:...
410
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
411

e94b17660   Christoph Lameter   [PATCH] slab: rem...
412
  #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
  #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
4cb0e11b1   Hidehiro Kawai   coredump_filter: ...
414
415
416
417
418
419
420
421
422
423
424
  static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
  
  static int __init coredump_filter_setup(char *s)
  {
  	default_dump_filter =
  		(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
  		MMF_DUMP_FILTER_MASK;
  	return 1;
  }
  
  __setup("coredump_filter=", coredump_filter_setup);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
  #include <linux/init_task.h>
858f09930   Alexey Dobriyan   aio: ifdef fields...
426
427
428
429
430
431
432
  static void mm_init_aio(struct mm_struct *mm)
  {
  #ifdef CONFIG_AIO
  	spin_lock_init(&mm->ioctx_lock);
  	INIT_HLIST_HEAD(&mm->ioctx_list);
  #endif
  }
78fb74669   Pavel Emelianov   Memory controller...
433
  static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434
435
436
437
438
  {
  	atomic_set(&mm->mm_users, 1);
  	atomic_set(&mm->mm_count, 1);
  	init_rwsem(&mm->mmap_sem);
  	INIT_LIST_HEAD(&mm->mmlist);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
439
440
  	mm->flags = (current->mm) ?
  		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
999d9fc16   Oleg Nesterov   coredump: move mm...
441
  	mm->core_state = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
  	mm->nr_ptes = 0;
d559db086   KAMEZAWA Hiroyuki   mm: clean up mm_c...
443
  	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
  	spin_lock_init(&mm->page_table_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
445
  	mm->free_area_cache = TASK_UNMAPPED_BASE;
1363c3cd8   Wolfgang Wander   [PATCH] Avoiding ...
446
  	mm->cached_hole_size = ~0UL;
858f09930   Alexey Dobriyan   aio: ifdef fields...
447
  	mm_init_aio(mm);
cf475ad28   Balbir Singh   cgroups: add an o...
448
  	mm_init_owner(mm, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
450
451
  
  	if (likely(!mm_alloc_pgd(mm))) {
  		mm->def_flags = 0;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
452
  		mmu_notifier_mm_init(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
454
  		return mm;
  	}
78fb74669   Pavel Emelianov   Memory controller...
455

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
456
457
458
459
460
461
462
463
464
465
466
467
468
469
  	free_mm(mm);
  	return NULL;
  }
  
  /*
   * Allocate and initialize an mm_struct.
   */
  struct mm_struct * mm_alloc(void)
  {
  	struct mm_struct * mm;
  
  	mm = allocate_mm();
  	if (mm) {
  		memset(mm, 0, sizeof(*mm));
78fb74669   Pavel Emelianov   Memory controller...
470
  		mm = mm_init(mm, current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
472
473
474
475
476
477
478
479
  	}
  	return mm;
  }
  
  /*
   * Called when the last reference to the mm
   * is dropped: either by a lazy thread or by
   * mmput. Free the page directory and the mm.
   */
7ad5b3a50   Harvey Harrison   kernel: remove fa...
480
  void __mmdrop(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
481
482
483
484
  {
  	BUG_ON(mm == &init_mm);
  	mm_free_pgd(mm);
  	destroy_context(mm);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
485
  	mmu_notifier_mm_destroy(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
486
487
  	free_mm(mm);
  }
6d4e4c4fc   Avi Kivity   KVM: Disallow for...
488
  EXPORT_SYMBOL_GPL(__mmdrop);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
489
490
491
492
493
494
  
  /*
   * Decrement the use count and release all resources for an mm.
   */
  void mmput(struct mm_struct *mm)
  {
0ae26f1b3   Andrew Morton   [PATCH] mmput() m...
495
  	might_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
496
497
  	if (atomic_dec_and_test(&mm->mm_users)) {
  		exit_aio(mm);
1c2fb7a4c   Andrea Arcangeli   ksm: fix deadlock...
498
  		ksm_exit(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
499
  		exit_mmap(mm);
925d1c401   Matt Helsley   procfs task exe s...
500
  		set_mm_exe_file(mm, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
501
502
503
504
505
506
  		if (!list_empty(&mm->mmlist)) {
  			spin_lock(&mmlist_lock);
  			list_del(&mm->mmlist);
  			spin_unlock(&mmlist_lock);
  		}
  		put_swap_token(mm);
801460d0c   Hiroshi Shimamoto   task_struct clean...
507
508
  		if (mm->binfmt)
  			module_put(mm->binfmt->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
509
510
511
512
513
514
515
516
  		mmdrop(mm);
  	}
  }
  EXPORT_SYMBOL_GPL(mmput);
  
  /**
   * get_task_mm - acquire a reference to the task's mm
   *
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
517
   * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518
519
520
521
522
523
524
525
526
527
528
529
   * this kernel workthread has transiently adopted a user mm with use_mm,
   * to do its AIO) is not set and if so returns a reference to it, after
   * bumping up the use count.  User must release the mm via mmput()
   * after use.  Typically used by /proc and ptrace.
   */
  struct mm_struct *get_task_mm(struct task_struct *task)
  {
  	struct mm_struct *mm;
  
  	task_lock(task);
  	mm = task->mm;
  	if (mm) {
246bb0b1d   Oleg Nesterov   kill PF_BORROWED_...
530
  		if (task->flags & PF_KTHREAD)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
  			mm = NULL;
  		else
  			atomic_inc(&mm->mm_users);
  	}
  	task_unlock(task);
  	return mm;
  }
  EXPORT_SYMBOL_GPL(get_task_mm);
  
  /* Please note the differences between mmput and mm_release.
   * mmput is called whenever we stop holding onto a mm_struct,
   * error success whatever.
   *
   * mm_release is called after a mm_struct has been removed
   * from the current process.
   *
   * This difference is important for error handling, when we
   * only half set up a mm_struct for a new process and need to restore
   * the old one.  Because we mmput the new mm_struct before
   * restoring the old one. . .
   * Eric Biederman 10 January 1998
   */
  void mm_release(struct task_struct *tsk, struct mm_struct *mm)
  {
  	struct completion *vfork_done = tsk->vfork_done;
8141c7f3e   Linus Torvalds   Move "exit_robust...
556
557
  	/* Get rid of any futexes when releasing the mm */
  #ifdef CONFIG_FUTEX
fc6b177de   Peter Zijlstra   futex: Nullify ro...
558
  	if (unlikely(tsk->robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
559
  		exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
560
561
  		tsk->robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
562
  #ifdef CONFIG_COMPAT
fc6b177de   Peter Zijlstra   futex: Nullify ro...
563
  	if (unlikely(tsk->compat_robust_list)) {
8141c7f3e   Linus Torvalds   Move "exit_robust...
564
  		compat_exit_robust_list(tsk);
fc6b177de   Peter Zijlstra   futex: Nullify ro...
565
566
  		tsk->compat_robust_list = NULL;
  	}
8141c7f3e   Linus Torvalds   Move "exit_robust...
567
  #endif
322a2c100   Thomas Gleixner   futex: Move exit_...
568
569
  	if (unlikely(!list_empty(&tsk->pi_state_list)))
  		exit_pi_state_list(tsk);
8141c7f3e   Linus Torvalds   Move "exit_robust...
570
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
571
572
573
574
575
576
577
578
  	/* Get rid of any cached register state */
  	deactivate_mm(tsk, mm);
  
  	/* notify parent sleeping on vfork() */
  	if (vfork_done) {
  		tsk->vfork_done = NULL;
  		complete(vfork_done);
  	}
fec1d0115   Roland McGrath   [PATCH] Disable C...
579
580
581
582
583
584
585
  
  	/*
  	 * If we're exiting normally, clear a user-space tid field if
  	 * requested.  We leave this alone when dying by signal, to leave
  	 * the value intact in a core dump, and to save the unnecessary
  	 * trouble otherwise.  Userland only wants this done for a sys_exit.
  	 */
9c8a8228d   Eric Dumazet   execve: must clea...
586
587
588
589
590
591
592
593
594
595
596
  	if (tsk->clear_child_tid) {
  		if (!(tsk->flags & PF_SIGNALED) &&
  		    atomic_read(&mm->mm_users) > 1) {
  			/*
  			 * We don't check the error code - if userspace has
  			 * not set up a proper pointer then tough luck.
  			 */
  			put_user(0, tsk->clear_child_tid);
  			sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
  					1, NULL, NULL, 0);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
597
  		tsk->clear_child_tid = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
598
599
  	}
  }
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
600
601
602
603
  /*
   * Allocate a new mm structure and copy contents from the
   * mm structure of the passed in task structure.
   */
402b08622   Carsten Otte   s390: KVM prepara...
604
  struct mm_struct *dup_mm(struct task_struct *tsk)
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
605
606
607
608
609
610
611
612
613
614
615
616
  {
  	struct mm_struct *mm, *oldmm = current->mm;
  	int err;
  
  	if (!oldmm)
  		return NULL;
  
  	mm = allocate_mm();
  	if (!mm)
  		goto fail_nomem;
  
  	memcpy(mm, oldmm, sizeof(*mm));
7602bdf2f   Ashwin Chaugule   [PATCH] new schem...
617
618
619
  	/* Initializing for Swap token stuff */
  	mm->token_priority = 0;
  	mm->last_interval = 0;
78fb74669   Pavel Emelianov   Memory controller...
620
  	if (!mm_init(mm, tsk))
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
621
622
623
624
  		goto fail_nomem;
  
  	if (init_new_context(tsk, mm))
  		goto fail_nocontext;
925d1c401   Matt Helsley   procfs task exe s...
625
  	dup_mm_exe_file(oldmm, mm);
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
626
627
628
629
630
631
  	err = dup_mmap(mm, oldmm);
  	if (err)
  		goto free_pt;
  
  	mm->hiwater_rss = get_mm_rss(mm);
  	mm->hiwater_vm = mm->total_vm;
801460d0c   Hiroshi Shimamoto   task_struct clean...
632
633
  	if (mm->binfmt && !try_module_get(mm->binfmt->module))
  		goto free_pt;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
634
635
636
  	return mm;
  
  free_pt:
801460d0c   Hiroshi Shimamoto   task_struct clean...
637
638
  	/* don't put binfmt in mmput, we haven't got module yet */
  	mm->binfmt = NULL;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
639
640
641
642
643
644
645
646
647
648
649
650
651
652
  	mmput(mm);
  
  fail_nomem:
  	return NULL;
  
  fail_nocontext:
  	/*
  	 * If init_new_context() failed, we cannot use mmput() to free the mm
  	 * because it calls destroy_context()
  	 */
  	mm_free_pgd(mm);
  	free_mm(mm);
  	return NULL;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
653
654
655
656
657
658
659
  static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
  {
  	struct mm_struct * mm, *oldmm;
  	int retval;
  
  	tsk->min_flt = tsk->maj_flt = 0;
  	tsk->nvcsw = tsk->nivcsw = 0;
17406b82d   Mandeep Singh Baines   softlockup: remov...
660
661
662
  #ifdef CONFIG_DETECT_HUNG_TASK
  	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
  
  	tsk->mm = NULL;
  	tsk->active_mm = NULL;
  
  	/*
  	 * Are we cloning a kernel thread?
  	 *
  	 * We need to steal a active VM for that..
  	 */
  	oldmm = current->mm;
  	if (!oldmm)
  		return 0;
  
  	if (clone_flags & CLONE_VM) {
  		atomic_inc(&oldmm->mm_users);
  		mm = oldmm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
679
680
681
682
  		goto good_mm;
  	}
  
  	retval = -ENOMEM;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
683
  	mm = dup_mm(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
684
685
  	if (!mm)
  		goto fail_nomem;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686
  good_mm:
7602bdf2f   Ashwin Chaugule   [PATCH] new schem...
687
688
689
  	/* Initializing for Swap token stuff */
  	mm->token_priority = 0;
  	mm->last_interval = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
690
691
692
  	tsk->mm = mm;
  	tsk->active_mm = mm;
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
694
  fail_nomem:
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
695
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
696
  static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
  {
498052bba   Al Viro   New locking/refco...
698
  	struct fs_struct *fs = current->fs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
699
  	if (clone_flags & CLONE_FS) {
498052bba   Al Viro   New locking/refco...
700
701
702
703
704
705
706
707
  		/* tsk->fs is already what we want */
  		write_lock(&fs->lock);
  		if (fs->in_exec) {
  			write_unlock(&fs->lock);
  			return -EAGAIN;
  		}
  		fs->users++;
  		write_unlock(&fs->lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
709
  		return 0;
  	}
498052bba   Al Viro   New locking/refco...
710
  	tsk->fs = copy_fs_struct(fs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
711
712
713
714
  	if (!tsk->fs)
  		return -ENOMEM;
  	return 0;
  }
a016f3389   JANAK DESAI   [PATCH] unshare s...
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
  static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
  {
  	struct files_struct *oldf, *newf;
  	int error = 0;
  
  	/*
  	 * A background process may not have any files ...
  	 */
  	oldf = current->files;
  	if (!oldf)
  		goto out;
  
  	if (clone_flags & CLONE_FILES) {
  		atomic_inc(&oldf->count);
  		goto out;
  	}
a016f3389   JANAK DESAI   [PATCH] unshare s...
731
732
733
734
735
736
737
738
739
  	newf = dup_fd(oldf, &error);
  	if (!newf)
  		goto out;
  
  	tsk->files = newf;
  	error = 0;
  out:
  	return error;
  }
fadad878c   Jens Axboe   kernel: add CLONE...
740
  static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
fd0928df9   Jens Axboe   ioprio: move io p...
741
742
743
744
745
746
  {
  #ifdef CONFIG_BLOCK
  	struct io_context *ioc = current->io_context;
  
  	if (!ioc)
  		return 0;
fadad878c   Jens Axboe   kernel: add CLONE...
747
748
749
750
751
752
753
754
  	/*
  	 * Share io context with parent, if CLONE_IO is set
  	 */
  	if (clone_flags & CLONE_IO) {
  		tsk->io_context = ioc_task_link(ioc);
  		if (unlikely(!tsk->io_context))
  			return -ENOMEM;
  	} else if (ioprio_valid(ioc->ioprio)) {
fd0928df9   Jens Axboe   ioprio: move io p...
755
756
757
  		tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
  		if (unlikely(!tsk->io_context))
  			return -ENOMEM;
fd0928df9   Jens Axboe   ioprio: move io p...
758
759
760
761
762
  		tsk->io_context->ioprio = ioc->ioprio;
  	}
  #endif
  	return 0;
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
763
  static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764
765
  {
  	struct sighand_struct *sig;
60348802e   Zhaolei   fork.c: cleanup f...
766
  	if (clone_flags & CLONE_SIGHAND) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
767
768
769
770
  		atomic_inc(&current->sighand->count);
  		return 0;
  	}
  	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
e56d09031   Ingo Molnar   [PATCH] RCU signa...
771
  	rcu_assign_pointer(tsk->sighand, sig);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
  	if (!sig)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
774
775
776
777
  	atomic_set(&sig->count, 1);
  	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
  	return 0;
  }
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
778
  void __cleanup_sighand(struct sighand_struct *sighand)
c81addc9d   Oleg Nesterov   [PATCH] rename __...
779
  {
c81addc9d   Oleg Nesterov   [PATCH] rename __...
780
781
782
  	if (atomic_dec_and_test(&sighand->count))
  		kmem_cache_free(sighand_cachep, sighand);
  }
f06febc96   Frank Mayhar   timers: fix itime...
783
784
785
786
787
788
  
  /*
   * Initialize POSIX timer handling for a thread group.
   */
  static void posix_cpu_timers_init_group(struct signal_struct *sig)
  {
78d7d407b   Jiri Slaby   kernel core: use ...
789
  	unsigned long cpu_limit;
f06febc96   Frank Mayhar   timers: fix itime...
790
791
  	/* Thread group counters. */
  	thread_group_cputime_init(sig);
78d7d407b   Jiri Slaby   kernel core: use ...
792
793
794
  	cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
  	if (cpu_limit != RLIM_INFINITY) {
  		sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
6279a751f   Oleg Nesterov   posix-timers: fix...
795
796
  		sig->cputimer.running = 1;
  	}
f06febc96   Frank Mayhar   timers: fix itime...
797
798
799
800
801
  	/* The timer lists. */
  	INIT_LIST_HEAD(&sig->cpu_timers[0]);
  	INIT_LIST_HEAD(&sig->cpu_timers[1]);
  	INIT_LIST_HEAD(&sig->cpu_timers[2]);
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
802
  static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
803
804
  {
  	struct signal_struct *sig;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
805

4ab6c0833   Oleg Nesterov   clone(): fix race...
806
  	if (clone_flags & CLONE_THREAD)
490dea45d   Peter Zijlstra   itimers: remove t...
807
  		return 0;
490dea45d   Peter Zijlstra   itimers: remove t...
808

a56704ef6   Veaceslav Falico   copy_signal() cle...
809
  	sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
810
811
812
  	tsk->signal = sig;
  	if (!sig)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
813
814
815
  	atomic_set(&sig->count, 1);
  	atomic_set(&sig->live, 1);
  	init_waitqueue_head(&sig->wait_chldexit);
b3bfa0cba   Sukadev Bhattiprolu   signals: protect ...
816
817
  	if (clone_flags & CLONE_NEWPID)
  		sig->flags |= SIGNAL_UNKILLABLE;
db51aeccd   Oleg Nesterov   signals: microopt...
818
  	sig->curr_target = tsk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
819
820
  	init_sigpending(&sig->shared_pending);
  	INIT_LIST_HEAD(&sig->posix_timers);
c9cb2e3d7   Thomas Gleixner   [PATCH] hrtimers:...
821
  	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
822
  	sig->real_timer.function = it_real_fn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
824
825
826
  	task_lock(current->group_leader);
  	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
  	task_unlock(current->group_leader);
6279a751f   Oleg Nesterov   posix-timers: fix...
827
  	posix_cpu_timers_init_group(sig);
522ed7767   Miloslav Trmac   Audit: add TTY in...
828
  	tty_audit_fork(sig);
28b83c519   KOSAKI Motohiro   oom: move oom_adj...
829
  	sig->oom_adj = current->signal->oom_adj;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
830
831
  	return 0;
  }
6b3934ef5   Oleg Nesterov   [PATCH] copy_proc...
832
833
  void __cleanup_signal(struct signal_struct *sig)
  {
f06febc96   Frank Mayhar   timers: fix itime...
834
  	thread_group_cputime_free(sig);
9c9f4ded9   Alan Cox   tty: Add a kref c...
835
  	tty_kref_put(sig->tty);
6b3934ef5   Oleg Nesterov   [PATCH] copy_proc...
836
837
  	kmem_cache_free(signal_cachep, sig);
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
838
  static void copy_flags(unsigned long clone_flags, struct task_struct *p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
839
840
  {
  	unsigned long new_flags = p->flags;
831441862   Rafael J. Wysocki   Freezer: make ker...
841
  	new_flags &= ~PF_SUPERPRIV;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
842
  	new_flags |= PF_FORKNOEXEC;
09a05394f   Roland McGrath   tracehook: clone
843
  	new_flags |= PF_STARTING;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
844
  	p->flags = new_flags;
2e1318956   Rafael J. Wysocki   freezer: prevent ...
845
  	clear_freeze_flag(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
846
  }
17da2bd90   Heiko Carstens   [CVE-2009-0029] S...
847
  SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848
849
  {
  	current->clear_child_tid = tidptr;
b488893a3   Pavel Emelyanov   pid namespaces: c...
850
  	return task_pid_vnr(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851
  }
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
852
  static void rt_mutex_init_task(struct task_struct *p)
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
853
  {
1d6154825   Thomas Gleixner   sched: Convert pi...
854
  	raw_spin_lock_init(&p->pi_lock);
e29e175b0   Zilvinas Valinskas   [PATCH] initialis...
855
  #ifdef CONFIG_RT_MUTEXES
1d6154825   Thomas Gleixner   sched: Convert pi...
856
  	plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
857
  	p->pi_blocked_on = NULL;
23f78d4a0   Ingo Molnar   [PATCH] pi-futex:...
858
859
  #endif
  }
cf475ad28   Balbir Singh   cgroups: add an o...
860
861
862
863
864
865
  #ifdef CONFIG_MM_OWNER
  void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
  {
  	mm->owner = p;
  }
  #endif /* CONFIG_MM_OWNER */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
866
  /*
f06febc96   Frank Mayhar   timers: fix itime...
867
868
869
870
871
872
873
874
875
876
877
878
879
   * Initialize POSIX timer handling for a single task.
   */
  static void posix_cpu_timers_init(struct task_struct *tsk)
  {
  	tsk->cputime_expires.prof_exp = cputime_zero;
  	tsk->cputime_expires.virt_exp = cputime_zero;
  	tsk->cputime_expires.sched_exp = 0;
  	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
  	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
880
881
882
883
884
885
886
   * This creates a new process as a copy of the old one,
   * but does not actually start it yet.
   *
   * It copies the registers, and all the appropriate
   * parts of the process environment (as per the clone
   * flags). The actual kick-off is left to the caller.
   */
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
887
888
889
890
  static struct task_struct *copy_process(unsigned long clone_flags,
  					unsigned long stack_start,
  					struct pt_regs *regs,
  					unsigned long stack_size,
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
891
  					int __user *child_tidptr,
09a05394f   Roland McGrath   tracehook: clone
892
893
  					struct pid *pid,
  					int trace)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
894
895
  {
  	int retval;
a24efe62d   Mariusz Kozlowski   kernel/fork.c: re...
896
  	struct task_struct *p;
b4f48b636   Paul Menage   Task Control Grou...
897
  	int cgroup_callbacks_done = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
  
  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
  		return ERR_PTR(-EINVAL);
  
  	/*
  	 * Thread groups must share signals as well, and detached threads
  	 * can only be started up within the thread group.
  	 */
  	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
  		return ERR_PTR(-EINVAL);
  
  	/*
  	 * Shared signal handlers imply shared VM. By way of the above,
  	 * thread groups also imply shared VM. Blocking this case allows
  	 * for various simplifications in other code.
  	 */
  	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
  		return ERR_PTR(-EINVAL);
123be07b0   Sukadev Bhattiprolu   fork(): disable C...
916
917
918
919
920
921
922
923
924
  	/*
  	 * Siblings of global init remain as zombies on exit since they are
  	 * not reaped by their parent (swapper). To solve this and to avoid
  	 * multi-rooted process trees, prevent global and container-inits
  	 * from creating siblings.
  	 */
  	if ((clone_flags & CLONE_PARENT) &&
  				current->signal->flags & SIGNAL_UNKILLABLE)
  		return ERR_PTR(-EINVAL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
925
926
927
928
929
930
931
932
  	retval = security_task_create(clone_flags);
  	if (retval)
  		goto fork_out;
  
  	retval = -ENOMEM;
  	p = dup_task_struct(current);
  	if (!p)
  		goto fork_out;
f7e8b616e   Steven Rostedt   function-graph: m...
933
  	ftrace_graph_init_task(p);
bea493a03   Peter Zijlstra   [PATCH] rt-mutex:...
934
  	rt_mutex_init_task(p);
d12c1a379   Ingo Molnar   lockdep: fix kern...
935
  #ifdef CONFIG_PROVE_LOCKING
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
936
937
938
  	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
  	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
939
  	retval = -EAGAIN;
3b11a1dec   David Howells   CRED: Differentia...
940
  	if (atomic_read(&p->real_cred->user->processes) >=
78d7d407b   Jiri Slaby   kernel core: use ...
941
  			task_rlimit(p, RLIMIT_NPROC)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
942
  		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
18b6e0414   Serge Hallyn   User namespaces: ...
943
  		    p->real_cred->user != INIT_USER)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
944
945
  			goto bad_fork_free;
  	}
f1752eec6   David Howells   CRED: Detach the ...
946
947
948
  	retval = copy_creds(p, clone_flags);
  	if (retval < 0)
  		goto bad_fork_free;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
949
950
951
952
953
954
  
  	/*
  	 * If multiple threads are within copy_process(), then this check
  	 * triggers too late. This doesn't hurt, the check is only there
  	 * to stop root fork bombs.
  	 */
04ec93fe9   Li Zefan   fork.c: fix NULL ...
955
  	retval = -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
956
957
  	if (nr_threads >= max_threads)
  		goto bad_fork_cleanup_count;
a1261f546   Al Viro   [PATCH] m68k: int...
958
  	if (!try_module_get(task_thread_info(p)->exec_domain->module))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
959
  		goto bad_fork_cleanup_count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
960
  	p->did_exec = 0;
ca74e92b4   Shailabh Nagar   [PATCH] per-task-...
961
  	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
962
  	copy_flags(clone_flags, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
963
964
  	INIT_LIST_HEAD(&p->children);
  	INIT_LIST_HEAD(&p->sibling);
f41d911f8   Paul E. McKenney   rcu: Merge preemp...
965
  	rcu_copy_process(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
966
967
  	p->vfork_done = NULL;
  	spin_lock_init(&p->alloc_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
968

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
969
970
971
972
  	init_sigpending(&p->pending);
  
  	p->utime = cputime_zero;
  	p->stime = cputime_zero;
9ac52315d   Laurent Vivier   sched: guest CPU ...
973
  	p->gtime = cputime_zero;
c66f08be7   Michael Neuling   Add scaled time t...
974
975
  	p->utimescaled = cputime_zero;
  	p->stimescaled = cputime_zero;
d99ca3b97   Hidetoshi Seto   sched, cputime: C...
976
  #ifndef CONFIG_VIRT_CPU_ACCOUNTING
73a2bcb0e   Peter Zijlstra   sched: keep utime...
977
  	p->prev_utime = cputime_zero;
9301899be   Balbir Singh   sched: fix /proc/...
978
  	p->prev_stime = cputime_zero;
d99ca3b97   Hidetoshi Seto   sched, cputime: C...
979
  #endif
a3a2e76c7   KAMEZAWA Hiroyuki   mm: avoid null-po...
980
981
982
  #if defined(SPLIT_RSS_COUNTING)
  	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
  #endif
172ba844a   Balbir Singh   sched: update del...
983

6976675d9   Arjan van de Ven   hrtimer: create a...
984
  	p->default_timer_slack_ns = current->timer_slack_ns;
5995477ab   Andrea Righi   task IO accountin...
985
  	task_io_accounting_init(&p->ioac);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
986
  	acct_clear_integrals(p);
f06febc96   Frank Mayhar   timers: fix itime...
987
  	posix_cpu_timers_init(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
988
989
990
  
  	p->lock_depth = -1;		/* -1 = no lock */
  	do_posix_clock_monotonic_gettime(&p->start_time);
924b42d5a   Tomas Janousek   Use boot based ti...
991
992
  	p->real_start_time = p->start_time;
  	monotonic_to_bootbased(&p->real_start_time);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
993
  	p->io_context = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
994
  	p->audit_context = NULL;
b4f48b636   Paul Menage   Task Control Grou...
995
  	cgroup_fork(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
996
  #ifdef CONFIG_NUMA
846a16bf0   Lee Schermerhorn   mempolicy: rename...
997
  	p->mempolicy = mpol_dup(p->mempolicy);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
998
999
1000
   	if (IS_ERR(p->mempolicy)) {
   		retval = PTR_ERR(p->mempolicy);
   		p->mempolicy = NULL;
b4f48b636   Paul Menage   Task Control Grou...
1001
   		goto bad_fork_cleanup_cgroup;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002
   	}
c61afb181   Paul Jackson   [PATCH] cpuset me...
1003
  	mpol_fix_fork_child_flag(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1004
  #endif
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1005
1006
  #ifdef CONFIG_TRACE_IRQFLAGS
  	p->irq_events = 0;
b36e4758d   Russell King   [ARM] Fix kernel/...
1007
1008
1009
  #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
  	p->hardirqs_enabled = 1;
  #else
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1010
  	p->hardirqs_enabled = 0;
b36e4758d   Russell King   [ARM] Fix kernel/...
1011
  #endif
de30a2b35   Ingo Molnar   [PATCH] lockdep: ...
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
  	p->hardirq_enable_ip = 0;
  	p->hardirq_enable_event = 0;
  	p->hardirq_disable_ip = _THIS_IP_;
  	p->hardirq_disable_event = 0;
  	p->softirqs_enabled = 1;
  	p->softirq_enable_ip = _THIS_IP_;
  	p->softirq_enable_event = 0;
  	p->softirq_disable_ip = 0;
  	p->softirq_disable_event = 0;
  	p->hardirq_context = 0;
  	p->softirq_context = 0;
  #endif
fbb9ce953   Ingo Molnar   [PATCH] lockdep: ...
1024
1025
1026
1027
1028
  #ifdef CONFIG_LOCKDEP
  	p->lockdep_depth = 0; /* no locks held yet */
  	p->curr_chain_key = 0;
  	p->lockdep_recursion = 0;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1029

408894ee4   Ingo Molnar   [PATCH] mutex sub...
1030
1031
1032
  #ifdef CONFIG_DEBUG_MUTEXES
  	p->blocked_on = NULL; /* not blocked yet */
  #endif
569b846df   KAMEZAWA Hiroyuki   memcg: coalesce u...
1033
1034
1035
1036
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  	p->memcg_batch.do_batch = 0;
  	p->memcg_batch.memcg = NULL;
  #endif
0f4814065   Markus Metzger   x86, ptrace: add ...
1037
1038
  
  	p->bts = NULL;
408894ee4   Ingo Molnar   [PATCH] mutex sub...
1039

d899bf7b5   Stefani Seibold   procfs: provide s...
1040
  	p->stack_start = stack_start;
3c90e6e99   Srivatsa Vaddagiri   sched: fix copy_n...
1041
1042
  	/* Perform scheduler related setup. Assign this task to a CPU. */
  	sched_fork(p, clone_flags);
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1043

cdd6c482c   Ingo Molnar   perf: Do the big ...
1044
  	retval = perf_event_init_task(p);
6ab423e0e   Peter Zijlstra   perf_counter: Pro...
1045
1046
  	if (retval)
  		goto bad_fork_cleanup_policy;
3c90e6e99   Srivatsa Vaddagiri   sched: fix copy_n...
1047

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1048
  	if ((retval = audit_alloc(p)))
f1752eec6   David Howells   CRED: Detach the ...
1049
  		goto bad_fork_cleanup_policy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
  	/* copy all the process information */
  	if ((retval = copy_semundo(clone_flags, p)))
  		goto bad_fork_cleanup_audit;
  	if ((retval = copy_files(clone_flags, p)))
  		goto bad_fork_cleanup_semundo;
  	if ((retval = copy_fs(clone_flags, p)))
  		goto bad_fork_cleanup_files;
  	if ((retval = copy_sighand(clone_flags, p)))
  		goto bad_fork_cleanup_fs;
  	if ((retval = copy_signal(clone_flags, p)))
  		goto bad_fork_cleanup_sighand;
  	if ((retval = copy_mm(clone_flags, p)))
  		goto bad_fork_cleanup_signal;
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1063
  	if ((retval = copy_namespaces(clone_flags, p)))
d84f4f992   David Howells   CRED: Inaugurate ...
1064
  		goto bad_fork_cleanup_mm;
fadad878c   Jens Axboe   kernel: add CLONE...
1065
  	if ((retval = copy_io(clone_flags, p)))
fd0928df9   Jens Axboe   ioprio: move io p...
1066
  		goto bad_fork_cleanup_namespaces;
6f2c55b84   Alexey Dobriyan   Simplify copy_thr...
1067
  	retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1068
  	if (retval)
fd0928df9   Jens Axboe   ioprio: move io p...
1069
  		goto bad_fork_cleanup_io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1070

425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1071
1072
  	if (pid != &init_struct_pid) {
  		retval = -ENOMEM;
61bce0f13   Eric W. Biederman   pid: generalize t...
1073
  		pid = alloc_pid(p->nsproxy->pid_ns);
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1074
  		if (!pid)
fd0928df9   Jens Axboe   ioprio: move io p...
1075
  			goto bad_fork_cleanup_io;
6f4e64335   Pavel Emelyanov   pid namespaces: i...
1076
1077
  
  		if (clone_flags & CLONE_NEWPID) {
61bce0f13   Eric W. Biederman   pid: generalize t...
1078
  			retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
6f4e64335   Pavel Emelyanov   pid namespaces: i...
1079
1080
1081
  			if (retval < 0)
  				goto bad_fork_free_pid;
  		}
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1082
1083
1084
1085
1086
1087
  	}
  
  	p->pid = pid_nr(pid);
  	p->tgid = p->pid;
  	if (clone_flags & CLONE_THREAD)
  		p->tgid = current->tgid;
e885dcde7   Serge E. Hallyn   cgroup_clone: use...
1088
1089
1090
  	if (current->nsproxy != p->nsproxy) {
  		retval = ns_cgroup_clone(p, pid);
  		if (retval)
f7e8b616e   Steven Rostedt   function-graph: m...
1091
  			goto bad_fork_free_pid;
e885dcde7   Serge E. Hallyn   cgroup_clone: use...
1092
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
1094
1095
1096
1097
  	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
  	/*
  	 * Clear TID on mm_release()?
  	 */
  	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1098
  #ifdef CONFIG_FUTEX
8f17d3a50   Ingo Molnar   [PATCH] lightweig...
1099
1100
1101
1102
  	p->robust_list = NULL;
  #ifdef CONFIG_COMPAT
  	p->compat_robust_list = NULL;
  #endif
c87e2837b   Ingo Molnar   [PATCH] pi-futex:...
1103
1104
  	INIT_LIST_HEAD(&p->pi_state_list);
  	p->pi_state_cache = NULL;
42b2dd0a0   Alexey Dobriyan   Shrink task_struc...
1105
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1106
  	/*
f9a3879ab   GOTO Masanori   [PATCH] Fix sigal...
1107
1108
1109
1110
1111
1112
  	 * sigaltstack should be cleared when sharing the same VM
  	 */
  	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
  		p->sas_ss_sp = p->sas_ss_size = 0;
  
  	/*
6580807da   Oleg Nesterov   ptrace: copy_proc...
1113
1114
  	 * Syscall tracing and stepping should be turned off in the
  	 * child regardless of CLONE_PTRACE.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1115
  	 */
6580807da   Oleg Nesterov   ptrace: copy_proc...
1116
  	user_disable_single_step(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1117
  	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
ed75e8d58   Laurent Vivier   [PATCH] UML Suppo...
1118
1119
1120
  #ifdef TIF_SYSCALL_EMU
  	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
  #endif
9745512ce   Arjan van de Ven   sched: latencytop...
1121
  	clear_all_latency_tracing(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1122

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1123
1124
1125
1126
  	/* ok, now we should be set up.. */
  	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
  	p->pdeath_signal = 0;
  	p->exit_state = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1127
1128
1129
1130
1131
  	/*
  	 * Ok, make it visible to the rest of the system.
  	 * We dont wake it up yet.
  	 */
  	p->group_leader = p;
47e65328a   Oleg Nesterov   [PATCH] pids: kil...
1132
  	INIT_LIST_HEAD(&p->thread_group);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1133

b4f48b636   Paul Menage   Task Control Grou...
1134
1135
1136
1137
1138
  	/* Now that the task is set up, run cgroup callbacks if
  	 * necessary. We need to run them before the task is visible
  	 * on the tasklist. */
  	cgroup_fork_callbacks(p);
  	cgroup_callbacks_done = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1139
1140
  	/* Need tasklist lock for parent etc handling! */
  	write_lock_irq(&tasklist_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1141
  	/* CLONE_PARENT re-uses the old parent */
2d5516cbb   Oleg Nesterov   copy_process: fix...
1142
  	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1143
  		p->real_parent = current->real_parent;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1144
1145
  		p->parent_exec_id = current->parent_exec_id;
  	} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1146
  		p->real_parent = current;
2d5516cbb   Oleg Nesterov   copy_process: fix...
1147
1148
  		p->parent_exec_id = current->self_exec_id;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1149

3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1150
  	spin_lock(&current->sighand->siglock);
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1151
1152
1153
1154
1155
1156
1157
1158
1159
  
  	/*
  	 * Process group and session signals need to be delivered to just the
  	 * parent before the fork or both the parent and the child after the
  	 * fork. Restart if a signal comes in before we add the new process to
  	 * it's process group.
  	 * A fatal signal pending means that current will exit, so the new
  	 * thread can't slip out of an OOM kill (or normal SIGKILL).
   	 */
23ff44402   Daniel Walker   whitespace fixes:...
1160
  	recalc_sigpending();
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1161
1162
1163
1164
  	if (signal_pending(current)) {
  		spin_unlock(&current->sighand->siglock);
  		write_unlock_irq(&tasklist_lock);
  		retval = -ERESTARTNOINTR;
f7e8b616e   Steven Rostedt   function-graph: m...
1165
  		goto bad_fork_free_pid;
4a2c7a783   Oleg Nesterov   [PATCH] make fork...
1166
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1167
  	if (clone_flags & CLONE_THREAD) {
4ab6c0833   Oleg Nesterov   clone(): fix race...
1168
1169
  		atomic_inc(&current->signal->count);
  		atomic_inc(&current->signal->live);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1170
  		p->group_leader = current->group_leader;
47e65328a   Oleg Nesterov   [PATCH] pids: kil...
1171
  		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1172
  	}
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1173
  	if (likely(p->pid)) {
09a05394f   Roland McGrath   tracehook: clone
1174
  		tracehook_finish_clone(p, clone_flags, trace);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1175
1176
  
  		if (thread_group_leader(p)) {
5cd17569f   Eric W. Biederman   fix clone(CLONE_N...
1177
  			if (clone_flags & CLONE_NEWPID)
30e49c263   Pavel Emelyanov   pid namespaces: a...
1178
  				p->nsproxy->pid_ns->child_reaper = p;
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1179

fea9d1755   Oleg Nesterov   ITIMER_REAL: conv...
1180
  			p->signal->leader_pid = pid;
9c9f4ded9   Alan Cox   tty: Add a kref c...
1181
1182
  			tty_kref_put(p->signal->tty);
  			p->signal->tty = tty_kref_get(current->signal->tty);
5cd17569f   Eric W. Biederman   fix clone(CLONE_N...
1183
1184
  			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
  			attach_pid(p, PIDTYPE_SID, task_session(current));
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
1185
  			list_add_tail(&p->sibling, &p->real_parent->children);
5e85d4abe   Eric W. Biederman   [PATCH] task: Mak...
1186
  			list_add_tail_rcu(&p->tasks, &init_task.tasks);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1187
  			__get_cpu_var(process_counts)++;
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1188
  		}
85868995d   Sukadev Bhattiprolu   Use struct pid pa...
1189
  		attach_pid(p, PIDTYPE_PID, pid);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1190
  		nr_threads++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1191
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1192
  	total_forks++;
3f17da699   Oleg Nesterov   [PATCH] fix kill_...
1193
  	spin_unlock(&current->sighand->siglock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1194
  	write_unlock_irq(&tasklist_lock);
c13cf856c   Andrew Morton   [PATCH] fork.c: p...
1195
  	proc_fork_connector(p);
817929ec2   Paul Menage   Task Control Grou...
1196
  	cgroup_post_fork(p);
cdd6c482c   Ingo Molnar   perf: Do the big ...
1197
  	perf_event_fork(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1198
  	return p;
425fb2b4b   Pavel Emelyanov   pid namespaces: m...
1199
1200
1201
  bad_fork_free_pid:
  	if (pid != &init_struct_pid)
  		free_pid(pid);
fd0928df9   Jens Axboe   ioprio: move io p...
1202
  bad_fork_cleanup_io:
b69f22920   Louis Rilling   block: Fix io_con...
1203
1204
  	if (p->io_context)
  		exit_io_context(p);
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1205
  bad_fork_cleanup_namespaces:
444f378b2   Linus Torvalds   Revert "[PATCH] n...
1206
  	exit_task_namespaces(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1207
1208
1209
1210
  bad_fork_cleanup_mm:
  	if (p->mm)
  		mmput(p->mm);
  bad_fork_cleanup_signal:
4ab6c0833   Oleg Nesterov   clone(): fix race...
1211
1212
  	if (!(clone_flags & CLONE_THREAD))
  		__cleanup_signal(p->signal);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1213
  bad_fork_cleanup_sighand:
a7e5328a0   Oleg Nesterov   [PATCH] cleanup _...
1214
  	__cleanup_sighand(p->sighand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1215
1216
1217
1218
1219
1220
1221
1222
  bad_fork_cleanup_fs:
  	exit_fs(p); /* blocking */
  bad_fork_cleanup_files:
  	exit_files(p); /* blocking */
  bad_fork_cleanup_semundo:
  	exit_sem(p);
  bad_fork_cleanup_audit:
  	audit_free(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1223
  bad_fork_cleanup_policy:
cdd6c482c   Ingo Molnar   perf: Do the big ...
1224
  	perf_event_free_task(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1225
  #ifdef CONFIG_NUMA
f0be3d32b   Lee Schermerhorn   mempolicy: rename...
1226
  	mpol_put(p->mempolicy);
b4f48b636   Paul Menage   Task Control Grou...
1227
  bad_fork_cleanup_cgroup:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1228
  #endif
b4f48b636   Paul Menage   Task Control Grou...
1229
  	cgroup_exit(p, cgroup_callbacks_done);
35df17c57   Shailabh Nagar   [PATCH] task dela...
1230
  	delayacct_tsk_free(p);
a1261f546   Al Viro   [PATCH] m68k: int...
1231
  	module_put(task_thread_info(p)->exec_domain->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1232
  bad_fork_cleanup_count:
d84f4f992   David Howells   CRED: Inaugurate ...
1233
  	atomic_dec(&p->cred->user->processes);
e0e817392   David Howells   CRED: Add some co...
1234
  	exit_creds(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1235
1236
  bad_fork_free:
  	free_task(p);
fe7d37d1f   Oleg Nesterov   [PATCH] copy_proc...
1237
1238
  fork_out:
  	return ERR_PTR(retval);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1239
  }
6b2fb3c65   Adrian Bunk   idle_regs() must ...
1240
  noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1241
1242
1243
1244
  {
  	memset(regs, 0, sizeof(struct pt_regs));
  	return regs;
  }
9abcf40b1   Al Viro   [PATCH] fork_idle...
1245
  struct task_struct * __cpuinit fork_idle(int cpu)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1246
  {
36c8b5868   Ingo Molnar   [PATCH] sched: cl...
1247
  	struct task_struct *task;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1248
  	struct pt_regs regs;
30e49c263   Pavel Emelyanov   pid namespaces: a...
1249
  	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
09a05394f   Roland McGrath   tracehook: clone
1250
  			    &init_struct_pid, 0);
753ca4f31   Akinobu Mita   [PATCH] fix copy_...
1251
1252
  	if (!IS_ERR(task))
  		init_idle(task, cpu);
73b9ebfe1   Oleg Nesterov   [PATCH] pidhash: ...
1253

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1254
1255
  	return task;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
  /*
   *  Ok, this is the main fork-routine.
   *
   * It copies the process, and if successful kick-starts
   * it and waits for it to finish using the VM if required.
   */
  long do_fork(unsigned long clone_flags,
  	      unsigned long stack_start,
  	      struct pt_regs *regs,
  	      unsigned long stack_size,
  	      int __user *parent_tidptr,
  	      int __user *child_tidptr)
  {
  	struct task_struct *p;
  	int trace = 0;
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1271
  	long nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1272

bdff746a3   Andrew Morton   clone: prepare to...
1273
  	/*
18b6e0414   Serge Hallyn   User namespaces: ...
1274
1275
1276
1277
1278
1279
1280
1281
1282
  	 * Do some preliminary argument and permissions checking before we
  	 * actually start allocating stuff
  	 */
  	if (clone_flags & CLONE_NEWUSER) {
  		if (clone_flags & CLONE_THREAD)
  			return -EINVAL;
  		/* hopefully this check will go away when userns support is
  		 * complete
  		 */
7657d9049   Serge E. Hallyn   user namespaces: ...
1283
1284
  		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
  				!capable(CAP_SETGID))
18b6e0414   Serge Hallyn   User namespaces: ...
1285
1286
1287
1288
  			return -EPERM;
  	}
  
  	/*
bdff746a3   Andrew Morton   clone: prepare to...
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
  	 * We hope to recycle these flags after 2.6.26
  	 */
  	if (unlikely(clone_flags & CLONE_STOPPED)) {
  		static int __read_mostly count = 100;
  
  		if (count > 0 && printk_ratelimit()) {
  			char comm[TASK_COMM_LEN];
  
  			count--;
  			printk(KERN_INFO "fork(): process `%s' used deprecated "
  					"clone flags 0x%lx
  ",
  				get_task_comm(comm, current),
  				clone_flags & CLONE_STOPPED);
  		}
  	}
09a05394f   Roland McGrath   tracehook: clone
1305
1306
1307
1308
1309
  	/*
  	 * When called from kernel_thread, don't do user tracing stuff.
  	 */
  	if (likely(user_mode(regs)))
  		trace = tracehook_prepare_clone(clone_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1310

a6f5e0637   Sukadev Bhattiprolu   pid namespaces: m...
1311
  	p = copy_process(clone_flags, stack_start, regs, stack_size,
09a05394f   Roland McGrath   tracehook: clone
1312
  			 child_tidptr, NULL, trace);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1313
1314
1315
1316
1317
1318
  	/*
  	 * Do this prior waking up the new thread - the thread pointer
  	 * might get invalid after that point, if the thread exits quickly.
  	 */
  	if (!IS_ERR(p)) {
  		struct completion vfork;
0a16b6075   Mathieu Desnoyers   tracing, sched: L...
1319
  		trace_sched_process_fork(current, p);
6c5f3e7b4   Pavel Emelyanov   Pidns: make full ...
1320
  		nr = task_pid_vnr(p);
30e49c263   Pavel Emelyanov   pid namespaces: a...
1321
1322
1323
  
  		if (clone_flags & CLONE_PARENT_SETTID)
  			put_user(nr, parent_tidptr);
a6f5e0637   Sukadev Bhattiprolu   pid namespaces: m...
1324

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1325
1326
1327
1328
  		if (clone_flags & CLONE_VFORK) {
  			p->vfork_done = &vfork;
  			init_completion(&vfork);
  		}
a64e64944   Al Viro   [PATCH] return re...
1329
  		audit_finish_fork(p);
087eb4370   Oleg Nesterov   ptrace: tracehook...
1330
  		tracehook_report_clone(regs, clone_flags, nr, p);
09a05394f   Roland McGrath   tracehook: clone
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
  
  		/*
  		 * We set PF_STARTING at creation in case tracing wants to
  		 * use this to distinguish a fully live task from one that
  		 * hasn't gotten to tracehook_report_clone() yet.  Now we
  		 * clear it and set the child going.
  		 */
  		p->flags &= ~PF_STARTING;
  
  		if (unlikely(clone_flags & CLONE_STOPPED)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1341
1342
1343
1344
1345
  			/*
  			 * We'll start up with an immediate SIGSTOP.
  			 */
  			sigaddset(&p->pending.signal, SIGSTOP);
  			set_tsk_thread_flag(p, TIF_SIGPENDING);
d9ae90ac4   Oleg Nesterov   use __set_task_st...
1346
  			__set_task_state(p, TASK_STOPPED);
09a05394f   Roland McGrath   tracehook: clone
1347
1348
  		} else {
  			wake_up_new_task(p, clone_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1349
  		}
09a05394f   Roland McGrath   tracehook: clone
1350
1351
  		tracehook_report_clone_complete(trace, regs,
  						clone_flags, nr, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1352
  		if (clone_flags & CLONE_VFORK) {
ba96a0c88   Rafael J. Wysocki   freezer: fix vfor...
1353
  			freezer_do_not_count();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1354
  			wait_for_completion(&vfork);
ba96a0c88   Rafael J. Wysocki   freezer: fix vfor...
1355
  			freezer_count();
daded34be   Roland McGrath   tracehook: vfork-...
1356
  			tracehook_report_vfork_done(p, nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1357
1358
  		}
  	} else {
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1359
  		nr = PTR_ERR(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1360
  	}
92476d7fc   Eric W. Biederman   [PATCH] pidhash: ...
1361
  	return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1362
  }
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
1363
1364
1365
  #ifndef ARCH_MIN_MMSTRUCT_ALIGN
  #define ARCH_MIN_MMSTRUCT_ALIGN 0
  #endif
51cc50685   Alexey Dobriyan   SL*B: drop kmem c...
1366
  static void sighand_ctor(void *data)
aa1757f90   Oleg Nesterov   [PATCH] convert s...
1367
1368
  {
  	struct sighand_struct *sighand = data;
a35afb830   Christoph Lameter   Remove SLAB_CTOR_...
1369
  	spin_lock_init(&sighand->siglock);
b8fceee17   Davide Libenzi   signalfd simplifi...
1370
  	init_waitqueue_head(&sighand->signalfd_wqh);
aa1757f90   Oleg Nesterov   [PATCH] convert s...
1371
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1372
1373
1374
1375
  void __init proc_caches_init(void)
  {
  	sighand_cachep = kmem_cache_create("sighand_cache",
  			sizeof(struct sighand_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1376
1377
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
  			SLAB_NOTRACK, sighand_ctor);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1378
1379
  	signal_cachep = kmem_cache_create("signal_cache",
  			sizeof(struct signal_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1380
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
1381
  	files_cachep = kmem_cache_create("files_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1382
  			sizeof(struct files_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1383
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
20c2df83d   Paul Mundt   mm: Remove slab d...
1384
  	fs_cachep = kmem_cache_create("fs_cache",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1385
  			sizeof(struct fs_struct), 0,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1386
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1387
  	mm_cachep = kmem_cache_create("mm_struct",
5fd63b308   Ravikiran G Thirumalai   [PATCH] x86_64: I...
1388
  			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
2dff44052   Vegard Nossum   kmemcheck: add mm...
1389
  			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
33e5d7697   David Howells   nommu: fix a numb...
1390
  	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
8feae1311   David Howells   NOMMU: Make VMAs ...
1391
  	mmap_init();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
  }
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1393

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1394
1395
1396
1397
  /*
   * Check constraints on flags passed to the unshare system call and
   * force unsharing of additional process context as appropriate.
   */
a39bc5169   Alexey Dobriyan   Uninline fork.c/e...
1398
  static void check_unshare_flags(unsigned long *flags_ptr)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
  {
  	/*
  	 * If unsharing a thread from a thread group, must also
  	 * unshare vm.
  	 */
  	if (*flags_ptr & CLONE_THREAD)
  		*flags_ptr |= CLONE_VM;
  
  	/*
  	 * If unsharing vm, must also unshare signal handlers.
  	 */
  	if (*flags_ptr & CLONE_VM)
  		*flags_ptr |= CLONE_SIGHAND;
  
  	/*
  	 * If unsharing signal handlers and the task was created
  	 * using CLONE_THREAD, then must unshare the thread
  	 */
  	if ((*flags_ptr & CLONE_SIGHAND) &&
  	    (atomic_read(&current->signal->count) > 1))
  		*flags_ptr |= CLONE_THREAD;
  
  	/*
  	 * If unsharing namespace, must also unshare filesystem information.
  	 */
  	if (*flags_ptr & CLONE_NEWNS)
  		*flags_ptr |= CLONE_FS;
  }
  
  /*
   * Unsharing of tasks created with CLONE_THREAD is not supported yet
   */
  static int unshare_thread(unsigned long unshare_flags)
  {
  	if (unshare_flags & CLONE_THREAD)
  		return -EINVAL;
  
  	return 0;
  }
  
  /*
99d1419d9   JANAK DESAI   [PATCH] unshare s...
1440
   * Unshare the filesystem structure if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1441
1442
1443
1444
   */
  static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
  {
  	struct fs_struct *fs = current->fs;
498052bba   Al Viro   New locking/refco...
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
  	if (!(unshare_flags & CLONE_FS) || !fs)
  		return 0;
  
  	/* don't need lock here; in the worst case we'll do useless copy */
  	if (fs->users == 1)
  		return 0;
  
  	*new_fsp = copy_fs_struct(fs);
  	if (!*new_fsp)
  		return -ENOMEM;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1455
1456
1457
1458
1459
  
  	return 0;
  }
  
  /*
dae3c5a0b   Oleg Nesterov   [PATCH] sys_unsha...
1460
   * Unsharing of sighand is not supported yet
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1461
1462
1463
1464
   */
  static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
  {
  	struct sighand_struct *sigh = current->sighand;
dae3c5a0b   Oleg Nesterov   [PATCH] sys_unsha...
1465
  	if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1466
1467
1468
1469
1470
1471
  		return -EINVAL;
  	else
  		return 0;
  }
  
  /*
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1472
   * Unshare vm if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1473
1474
1475
1476
1477
1478
   */
  static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
  {
  	struct mm_struct *mm = current->mm;
  
  	if ((unshare_flags & CLONE_VM) &&
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1479
  	    (mm && atomic_read(&mm->mm_users) > 1)) {
2d61b8677   Oleg Nesterov   [PATCH] disable u...
1480
  		return -EINVAL;
a0a7ec308   JANAK DESAI   [PATCH] unshare s...
1481
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1482
1483
  
  	return 0;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1484
1485
1486
  }
  
  /*
a016f3389   JANAK DESAI   [PATCH] unshare s...
1487
   * Unshare file descriptor table if it is being shared
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1488
1489
1490
1491
   */
  static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
  {
  	struct files_struct *fd = current->files;
a016f3389   JANAK DESAI   [PATCH] unshare s...
1492
  	int error = 0;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1493
1494
  
  	if ((unshare_flags & CLONE_FILES) &&
a016f3389   JANAK DESAI   [PATCH] unshare s...
1495
1496
1497
1498
1499
  	    (fd && atomic_read(&fd->count) > 1)) {
  		*new_fdp = dup_fd(fd, &error);
  		if (!*new_fdp)
  			return error;
  	}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1500
1501
1502
1503
1504
  
  	return 0;
  }
  
  /*
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1505
1506
1507
1508
1509
1510
1511
   * unshare allows a process to 'unshare' part of the process
   * context which was originally shared using clone.  copy_*
   * functions used by do_fork() cannot be used here directly
   * because they modify an inactive task_struct that is being
   * constructed. Here we are modifying the current, active,
   * task_struct.
   */
6559eed8c   Heiko Carstens   [CVE-2009-0029] S...
1512
  SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1513
1514
1515
  {
  	int err = 0;
  	struct fs_struct *fs, *new_fs = NULL;
dae3c5a0b   Oleg Nesterov   [PATCH] sys_unsha...
1516
  	struct sighand_struct *new_sigh = NULL;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1517
1518
  	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
  	struct files_struct *fd, *new_fd = NULL;
cf7b708c8   Pavel Emelyanov   Make access to ta...
1519
  	struct nsproxy *new_nsproxy = NULL;
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1520
  	int do_sysvsem = 0;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1521
1522
  
  	check_unshare_flags(&unshare_flags);
06f9d4f94   Eric W. Biederman   [PATCH] unshare: ...
1523
1524
1525
  	/* Return -EINVAL for all unsupported flags */
  	err = -EINVAL;
  	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
25b21cb2f   Kirill Korotaev   [PATCH] IPC names...
1526
  				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
18b6e0414   Serge Hallyn   User namespaces: ...
1527
  				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
06f9d4f94   Eric W. Biederman   [PATCH] unshare: ...
1528
  		goto bad_unshare_out;
6013f67fc   Manfred Spraul   ipc: sysvsem: for...
1529
1530
1531
1532
1533
1534
  	/*
  	 * CLONE_NEWIPC must also detach from the undolist: after switching
  	 * to a new ipc namespace, the semaphore arrays from the old
  	 * namespace are unreachable.
  	 */
  	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1535
  		do_sysvsem = 1;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1536
1537
1538
1539
  	if ((err = unshare_thread(unshare_flags)))
  		goto bad_unshare_out;
  	if ((err = unshare_fs(unshare_flags, &new_fs)))
  		goto bad_unshare_cleanup_thread;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1540
  	if ((err = unshare_sighand(unshare_flags, &new_sigh)))
e3222c4ec   Badari Pulavarty   Merge sys_clone()...
1541
  		goto bad_unshare_cleanup_fs;
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1542
1543
1544
1545
  	if ((err = unshare_vm(unshare_flags, &new_mm)))
  		goto bad_unshare_cleanup_sigh;
  	if ((err = unshare_fd(unshare_flags, &new_fd)))
  		goto bad_unshare_cleanup_vm;
e3222c4ec   Badari Pulavarty   Merge sys_clone()...
1546
1547
  	if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
  			new_fs)))
9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1548
  		goto bad_unshare_cleanup_fd;
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1549

9edff4ab1   Manfred Spraul   ipc: sysvsem: imp...
1550
1551
1552
1553
1554
1555
1556
  	if (new_fs ||  new_mm || new_fd || do_sysvsem || new_nsproxy) {
  		if (do_sysvsem) {
  			/*
  			 * CLONE_SYSVSEM is equivalent to sys_exit().
  			 */
  			exit_sem(current);
  		}
ab516013a   Serge E. Hallyn   [PATCH] namespace...
1557

c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1558
  		if (new_nsproxy) {
cf7b708c8   Pavel Emelyanov   Make access to ta...
1559
1560
  			switch_task_namespaces(current, new_nsproxy);
  			new_nsproxy = NULL;
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1561
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1562

cf7b708c8   Pavel Emelyanov   Make access to ta...
1563
  		task_lock(current);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1564
1565
  		if (new_fs) {
  			fs = current->fs;
498052bba   Al Viro   New locking/refco...
1566
  			write_lock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1567
  			current->fs = new_fs;
498052bba   Al Viro   New locking/refco...
1568
1569
1570
1571
1572
  			if (--fs->users)
  				new_fs = NULL;
  			else
  				new_fs = fs;
  			write_unlock(&fs->lock);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1573
  		}
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
  		if (new_mm) {
  			mm = current->mm;
  			active_mm = current->active_mm;
  			current->mm = new_mm;
  			current->active_mm = new_mm;
  			activate_mm(active_mm, new_mm);
  			new_mm = mm;
  		}
  
  		if (new_fd) {
  			fd = current->files;
  			current->files = new_fd;
  			new_fd = fd;
  		}
  
  		task_unlock(current);
  	}
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1591
  	if (new_nsproxy)
444f378b2   Linus Torvalds   Revert "[PATCH] n...
1592
  		put_nsproxy(new_nsproxy);
c0b2fc316   Serge Hallyn   [PATCH] uts: copy...
1593

cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
  bad_unshare_cleanup_fd:
  	if (new_fd)
  		put_files_struct(new_fd);
  
  bad_unshare_cleanup_vm:
  	if (new_mm)
  		mmput(new_mm);
  
  bad_unshare_cleanup_sigh:
  	if (new_sigh)
  		if (atomic_dec_and_test(&new_sigh->count))
  			kmem_cache_free(sighand_cachep, new_sigh);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1606
1607
  bad_unshare_cleanup_fs:
  	if (new_fs)
498052bba   Al Viro   New locking/refco...
1608
  		free_fs_struct(new_fs);
cf2e340f4   JANAK DESAI   [PATCH] unshare s...
1609
1610
1611
1612
1613
  
  bad_unshare_cleanup_thread:
  bad_unshare_out:
  	return err;
  }
3b1253880   Al Viro   [PATCH] sanitize ...
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
  
  /*
   *	Helper to unshare the files of the current task.
   *	We don't want to expose copy_files internals to
   *	the exec layer of the kernel.
   */
  
  int unshare_files(struct files_struct **displaced)
  {
  	struct task_struct *task = current;
50704516f   Al Viro   Fix uninitialized...
1624
  	struct files_struct *copy = NULL;
3b1253880   Al Viro   [PATCH] sanitize ...
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
  	int error;
  
  	error = unshare_fd(CLONE_FILES, &copy);
  	if (error || !copy) {
  		*displaced = NULL;
  		return error;
  	}
  	*displaced = task->files;
  	task_lock(task);
  	task->files = copy;
  	task_unlock(task);
  	return 0;
  }