Commit 9bfb23fc4a481650e60d22dbe84c0fd5a9d49bba

Authored by Oleg Nesterov
Committed by Linus Torvalds
1 parent 4d51985e48

sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code

Cleanup: kill the dead code which does nothing but complicates the code
and confuses the reader.

sys_unshare(CLONE_THREAD/SIGHAND/VM) is not really implemented, and I
doubt very much it will ever work.  At least, nobody even tried since the
original 99d1419d96d7df9cfa56 ("unshare system call -v5: system call
handler function") was applied more than 4 years ago.

And the code is not consistent.  unshare_thread() always fails
unconditionally, while unshare_sighand() and unshare_vm() pretend to work
if there is nothing to unshare.

Remove unshare_thread(), unshare_sighand(), unshare_vm() helpers and
related variables and add a simple CLONE_THREAD | CLONE_SIGHAND| CLONE_VM
check into check_unshare_flags().

Also, move the "CLONE_NEWNS needs CLONE_FS" check from
check_unshare_flags() to sys_unshare().  This looks more consistent and
matches the similar do_sysvsem check in sys_unshare().

Note: with or without this patch "atomic_read(mm->mm_users) > 1" can give
a false positive due to get_task_mm().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: Janak Desai <janak@us.ibm.com>
Cc: Daniel Lezcano <daniel.lezcano@free.fr>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 25 additions and 98 deletions Inline Diff

1 /* 1 /*
2 * linux/kernel/fork.c 2 * linux/kernel/fork.c
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */ 5 */
6 6
7 /* 7 /*
8 * 'fork.c' contains the help-routines for the 'fork' system call 8 * 'fork.c' contains the help-routines for the 'fork' system call
9 * (see also entry.S and others). 9 * (see also entry.S and others).
10 * Fork is rather simple, once you get the hang of it, but the memory 10 * Fork is rather simple, once you get the hang of it, but the memory
11 * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' 11 * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
12 */ 12 */
13 13
14 #include <linux/slab.h> 14 #include <linux/slab.h>
15 #include <linux/init.h> 15 #include <linux/init.h>
16 #include <linux/unistd.h> 16 #include <linux/unistd.h>
17 #include <linux/module.h> 17 #include <linux/module.h>
18 #include <linux/vmalloc.h> 18 #include <linux/vmalloc.h>
19 #include <linux/completion.h> 19 #include <linux/completion.h>
20 #include <linux/personality.h> 20 #include <linux/personality.h>
21 #include <linux/mempolicy.h> 21 #include <linux/mempolicy.h>
22 #include <linux/sem.h> 22 #include <linux/sem.h>
23 #include <linux/file.h> 23 #include <linux/file.h>
24 #include <linux/fdtable.h> 24 #include <linux/fdtable.h>
25 #include <linux/iocontext.h> 25 #include <linux/iocontext.h>
26 #include <linux/key.h> 26 #include <linux/key.h>
27 #include <linux/binfmts.h> 27 #include <linux/binfmts.h>
28 #include <linux/mman.h> 28 #include <linux/mman.h>
29 #include <linux/mmu_notifier.h> 29 #include <linux/mmu_notifier.h>
30 #include <linux/fs.h> 30 #include <linux/fs.h>
31 #include <linux/nsproxy.h> 31 #include <linux/nsproxy.h>
32 #include <linux/capability.h> 32 #include <linux/capability.h>
33 #include <linux/cpu.h> 33 #include <linux/cpu.h>
34 #include <linux/cgroup.h> 34 #include <linux/cgroup.h>
35 #include <linux/security.h> 35 #include <linux/security.h>
36 #include <linux/hugetlb.h> 36 #include <linux/hugetlb.h>
37 #include <linux/swap.h> 37 #include <linux/swap.h>
38 #include <linux/syscalls.h> 38 #include <linux/syscalls.h>
39 #include <linux/jiffies.h> 39 #include <linux/jiffies.h>
40 #include <linux/tracehook.h> 40 #include <linux/tracehook.h>
41 #include <linux/futex.h> 41 #include <linux/futex.h>
42 #include <linux/compat.h> 42 #include <linux/compat.h>
43 #include <linux/kthread.h> 43 #include <linux/kthread.h>
44 #include <linux/task_io_accounting_ops.h> 44 #include <linux/task_io_accounting_ops.h>
45 #include <linux/rcupdate.h> 45 #include <linux/rcupdate.h>
46 #include <linux/ptrace.h> 46 #include <linux/ptrace.h>
47 #include <linux/mount.h> 47 #include <linux/mount.h>
48 #include <linux/audit.h> 48 #include <linux/audit.h>
49 #include <linux/memcontrol.h> 49 #include <linux/memcontrol.h>
50 #include <linux/ftrace.h> 50 #include <linux/ftrace.h>
51 #include <linux/profile.h> 51 #include <linux/profile.h>
52 #include <linux/rmap.h> 52 #include <linux/rmap.h>
53 #include <linux/ksm.h> 53 #include <linux/ksm.h>
54 #include <linux/acct.h> 54 #include <linux/acct.h>
55 #include <linux/tsacct_kern.h> 55 #include <linux/tsacct_kern.h>
56 #include <linux/cn_proc.h> 56 #include <linux/cn_proc.h>
57 #include <linux/freezer.h> 57 #include <linux/freezer.h>
58 #include <linux/delayacct.h> 58 #include <linux/delayacct.h>
59 #include <linux/taskstats_kern.h> 59 #include <linux/taskstats_kern.h>
60 #include <linux/random.h> 60 #include <linux/random.h>
61 #include <linux/tty.h> 61 #include <linux/tty.h>
62 #include <linux/proc_fs.h> 62 #include <linux/proc_fs.h>
63 #include <linux/blkdev.h> 63 #include <linux/blkdev.h>
64 #include <linux/fs_struct.h> 64 #include <linux/fs_struct.h>
65 #include <linux/magic.h> 65 #include <linux/magic.h>
66 #include <linux/perf_event.h> 66 #include <linux/perf_event.h>
67 #include <linux/posix-timers.h> 67 #include <linux/posix-timers.h>
68 #include <linux/user-return-notifier.h> 68 #include <linux/user-return-notifier.h>
69 #include <linux/oom.h> 69 #include <linux/oom.h>
70 #include <linux/khugepaged.h> 70 #include <linux/khugepaged.h>
71 71
72 #include <asm/pgtable.h> 72 #include <asm/pgtable.h>
73 #include <asm/pgalloc.h> 73 #include <asm/pgalloc.h>
74 #include <asm/uaccess.h> 74 #include <asm/uaccess.h>
75 #include <asm/mmu_context.h> 75 #include <asm/mmu_context.h>
76 #include <asm/cacheflush.h> 76 #include <asm/cacheflush.h>
77 #include <asm/tlbflush.h> 77 #include <asm/tlbflush.h>
78 78
79 #include <trace/events/sched.h> 79 #include <trace/events/sched.h>
80 80
81 /* 81 /*
82 * Protected counters by write_lock_irq(&tasklist_lock) 82 * Protected counters by write_lock_irq(&tasklist_lock)
83 */ 83 */
84 unsigned long total_forks; /* Handle normal Linux uptimes. */ 84 unsigned long total_forks; /* Handle normal Linux uptimes. */
85 int nr_threads; /* The idle threads do not count.. */ 85 int nr_threads; /* The idle threads do not count.. */
86 86
87 int max_threads; /* tunable limit on nr_threads */ 87 int max_threads; /* tunable limit on nr_threads */
88 88
89 DEFINE_PER_CPU(unsigned long, process_counts) = 0; 89 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
90 90
91 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 91 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
92 92
93 #ifdef CONFIG_PROVE_RCU 93 #ifdef CONFIG_PROVE_RCU
94 int lockdep_tasklist_lock_is_held(void) 94 int lockdep_tasklist_lock_is_held(void)
95 { 95 {
96 return lockdep_is_held(&tasklist_lock); 96 return lockdep_is_held(&tasklist_lock);
97 } 97 }
98 EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); 98 EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
99 #endif /* #ifdef CONFIG_PROVE_RCU */ 99 #endif /* #ifdef CONFIG_PROVE_RCU */
100 100
101 int nr_processes(void) 101 int nr_processes(void)
102 { 102 {
103 int cpu; 103 int cpu;
104 int total = 0; 104 int total = 0;
105 105
106 for_each_possible_cpu(cpu) 106 for_each_possible_cpu(cpu)
107 total += per_cpu(process_counts, cpu); 107 total += per_cpu(process_counts, cpu);
108 108
109 return total; 109 return total;
110 } 110 }
111 111
112 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 112 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
113 # define alloc_task_struct_node(node) \ 113 # define alloc_task_struct_node(node) \
114 kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node) 114 kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
115 # define free_task_struct(tsk) \ 115 # define free_task_struct(tsk) \
116 kmem_cache_free(task_struct_cachep, (tsk)) 116 kmem_cache_free(task_struct_cachep, (tsk))
117 static struct kmem_cache *task_struct_cachep; 117 static struct kmem_cache *task_struct_cachep;
118 #endif 118 #endif
119 119
120 #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR 120 #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
121 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, 121 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
122 int node) 122 int node)
123 { 123 {
124 #ifdef CONFIG_DEBUG_STACK_USAGE 124 #ifdef CONFIG_DEBUG_STACK_USAGE
125 gfp_t mask = GFP_KERNEL | __GFP_ZERO; 125 gfp_t mask = GFP_KERNEL | __GFP_ZERO;
126 #else 126 #else
127 gfp_t mask = GFP_KERNEL; 127 gfp_t mask = GFP_KERNEL;
128 #endif 128 #endif
129 struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); 129 struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
130 130
131 return page ? page_address(page) : NULL; 131 return page ? page_address(page) : NULL;
132 } 132 }
133 133
134 static inline void free_thread_info(struct thread_info *ti) 134 static inline void free_thread_info(struct thread_info *ti)
135 { 135 {
136 free_pages((unsigned long)ti, THREAD_SIZE_ORDER); 136 free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
137 } 137 }
138 #endif 138 #endif
139 139
140 /* SLAB cache for signal_struct structures (tsk->signal) */ 140 /* SLAB cache for signal_struct structures (tsk->signal) */
141 static struct kmem_cache *signal_cachep; 141 static struct kmem_cache *signal_cachep;
142 142
143 /* SLAB cache for sighand_struct structures (tsk->sighand) */ 143 /* SLAB cache for sighand_struct structures (tsk->sighand) */
144 struct kmem_cache *sighand_cachep; 144 struct kmem_cache *sighand_cachep;
145 145
146 /* SLAB cache for files_struct structures (tsk->files) */ 146 /* SLAB cache for files_struct structures (tsk->files) */
147 struct kmem_cache *files_cachep; 147 struct kmem_cache *files_cachep;
148 148
149 /* SLAB cache for fs_struct structures (tsk->fs) */ 149 /* SLAB cache for fs_struct structures (tsk->fs) */
150 struct kmem_cache *fs_cachep; 150 struct kmem_cache *fs_cachep;
151 151
152 /* SLAB cache for vm_area_struct structures */ 152 /* SLAB cache for vm_area_struct structures */
153 struct kmem_cache *vm_area_cachep; 153 struct kmem_cache *vm_area_cachep;
154 154
155 /* SLAB cache for mm_struct structures (tsk->mm) */ 155 /* SLAB cache for mm_struct structures (tsk->mm) */
156 static struct kmem_cache *mm_cachep; 156 static struct kmem_cache *mm_cachep;
157 157
158 static void account_kernel_stack(struct thread_info *ti, int account) 158 static void account_kernel_stack(struct thread_info *ti, int account)
159 { 159 {
160 struct zone *zone = page_zone(virt_to_page(ti)); 160 struct zone *zone = page_zone(virt_to_page(ti));
161 161
162 mod_zone_page_state(zone, NR_KERNEL_STACK, account); 162 mod_zone_page_state(zone, NR_KERNEL_STACK, account);
163 } 163 }
164 164
165 void free_task(struct task_struct *tsk) 165 void free_task(struct task_struct *tsk)
166 { 166 {
167 prop_local_destroy_single(&tsk->dirties); 167 prop_local_destroy_single(&tsk->dirties);
168 account_kernel_stack(tsk->stack, -1); 168 account_kernel_stack(tsk->stack, -1);
169 free_thread_info(tsk->stack); 169 free_thread_info(tsk->stack);
170 rt_mutex_debug_task_free(tsk); 170 rt_mutex_debug_task_free(tsk);
171 ftrace_graph_exit_task(tsk); 171 ftrace_graph_exit_task(tsk);
172 free_task_struct(tsk); 172 free_task_struct(tsk);
173 } 173 }
174 EXPORT_SYMBOL(free_task); 174 EXPORT_SYMBOL(free_task);
175 175
176 static inline void free_signal_struct(struct signal_struct *sig) 176 static inline void free_signal_struct(struct signal_struct *sig)
177 { 177 {
178 taskstats_tgid_free(sig); 178 taskstats_tgid_free(sig);
179 sched_autogroup_exit(sig); 179 sched_autogroup_exit(sig);
180 kmem_cache_free(signal_cachep, sig); 180 kmem_cache_free(signal_cachep, sig);
181 } 181 }
182 182
183 static inline void put_signal_struct(struct signal_struct *sig) 183 static inline void put_signal_struct(struct signal_struct *sig)
184 { 184 {
185 if (atomic_dec_and_test(&sig->sigcnt)) 185 if (atomic_dec_and_test(&sig->sigcnt))
186 free_signal_struct(sig); 186 free_signal_struct(sig);
187 } 187 }
188 188
189 void __put_task_struct(struct task_struct *tsk) 189 void __put_task_struct(struct task_struct *tsk)
190 { 190 {
191 WARN_ON(!tsk->exit_state); 191 WARN_ON(!tsk->exit_state);
192 WARN_ON(atomic_read(&tsk->usage)); 192 WARN_ON(atomic_read(&tsk->usage));
193 WARN_ON(tsk == current); 193 WARN_ON(tsk == current);
194 194
195 exit_creds(tsk); 195 exit_creds(tsk);
196 delayacct_tsk_free(tsk); 196 delayacct_tsk_free(tsk);
197 put_signal_struct(tsk->signal); 197 put_signal_struct(tsk->signal);
198 198
199 if (!profile_handoff_task(tsk)) 199 if (!profile_handoff_task(tsk))
200 free_task(tsk); 200 free_task(tsk);
201 } 201 }
202 EXPORT_SYMBOL_GPL(__put_task_struct); 202 EXPORT_SYMBOL_GPL(__put_task_struct);
203 203
204 /* 204 /*
205 * macro override instead of weak attribute alias, to workaround 205 * macro override instead of weak attribute alias, to workaround
206 * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. 206 * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
207 */ 207 */
208 #ifndef arch_task_cache_init 208 #ifndef arch_task_cache_init
209 #define arch_task_cache_init() 209 #define arch_task_cache_init()
210 #endif 210 #endif
211 211
212 void __init fork_init(unsigned long mempages) 212 void __init fork_init(unsigned long mempages)
213 { 213 {
214 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 214 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
215 #ifndef ARCH_MIN_TASKALIGN 215 #ifndef ARCH_MIN_TASKALIGN
216 #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES 216 #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
217 #endif 217 #endif
218 /* create a slab on which task_structs can be allocated */ 218 /* create a slab on which task_structs can be allocated */
219 task_struct_cachep = 219 task_struct_cachep =
220 kmem_cache_create("task_struct", sizeof(struct task_struct), 220 kmem_cache_create("task_struct", sizeof(struct task_struct),
221 ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); 221 ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
222 #endif 222 #endif
223 223
224 /* do the arch specific task caches init */ 224 /* do the arch specific task caches init */
225 arch_task_cache_init(); 225 arch_task_cache_init();
226 226
227 /* 227 /*
228 * The default maximum number of threads is set to a safe 228 * The default maximum number of threads is set to a safe
229 * value: the thread structures can take up at most half 229 * value: the thread structures can take up at most half
230 * of memory. 230 * of memory.
231 */ 231 */
232 max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); 232 max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
233 233
234 /* 234 /*
235 * we need to allow at least 20 threads to boot a system 235 * we need to allow at least 20 threads to boot a system
236 */ 236 */
237 if(max_threads < 20) 237 if(max_threads < 20)
238 max_threads = 20; 238 max_threads = 20;
239 239
240 init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; 240 init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
241 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; 241 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
242 init_task.signal->rlim[RLIMIT_SIGPENDING] = 242 init_task.signal->rlim[RLIMIT_SIGPENDING] =
243 init_task.signal->rlim[RLIMIT_NPROC]; 243 init_task.signal->rlim[RLIMIT_NPROC];
244 } 244 }
245 245
246 int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, 246 int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
247 struct task_struct *src) 247 struct task_struct *src)
248 { 248 {
249 *dst = *src; 249 *dst = *src;
250 return 0; 250 return 0;
251 } 251 }
252 252
253 static struct task_struct *dup_task_struct(struct task_struct *orig) 253 static struct task_struct *dup_task_struct(struct task_struct *orig)
254 { 254 {
255 struct task_struct *tsk; 255 struct task_struct *tsk;
256 struct thread_info *ti; 256 struct thread_info *ti;
257 unsigned long *stackend; 257 unsigned long *stackend;
258 int node = tsk_fork_get_node(orig); 258 int node = tsk_fork_get_node(orig);
259 int err; 259 int err;
260 260
261 prepare_to_copy(orig); 261 prepare_to_copy(orig);
262 262
263 tsk = alloc_task_struct_node(node); 263 tsk = alloc_task_struct_node(node);
264 if (!tsk) 264 if (!tsk)
265 return NULL; 265 return NULL;
266 266
267 ti = alloc_thread_info_node(tsk, node); 267 ti = alloc_thread_info_node(tsk, node);
268 if (!ti) { 268 if (!ti) {
269 free_task_struct(tsk); 269 free_task_struct(tsk);
270 return NULL; 270 return NULL;
271 } 271 }
272 272
273 err = arch_dup_task_struct(tsk, orig); 273 err = arch_dup_task_struct(tsk, orig);
274 if (err) 274 if (err)
275 goto out; 275 goto out;
276 276
277 tsk->stack = ti; 277 tsk->stack = ti;
278 278
279 err = prop_local_init_single(&tsk->dirties); 279 err = prop_local_init_single(&tsk->dirties);
280 if (err) 280 if (err)
281 goto out; 281 goto out;
282 282
283 setup_thread_stack(tsk, orig); 283 setup_thread_stack(tsk, orig);
284 clear_user_return_notifier(tsk); 284 clear_user_return_notifier(tsk);
285 clear_tsk_need_resched(tsk); 285 clear_tsk_need_resched(tsk);
286 stackend = end_of_stack(tsk); 286 stackend = end_of_stack(tsk);
287 *stackend = STACK_END_MAGIC; /* for overflow detection */ 287 *stackend = STACK_END_MAGIC; /* for overflow detection */
288 288
289 #ifdef CONFIG_CC_STACKPROTECTOR 289 #ifdef CONFIG_CC_STACKPROTECTOR
290 tsk->stack_canary = get_random_int(); 290 tsk->stack_canary = get_random_int();
291 #endif 291 #endif
292 292
293 /* One for us, one for whoever does the "release_task()" (usually parent) */ 293 /* One for us, one for whoever does the "release_task()" (usually parent) */
294 atomic_set(&tsk->usage,2); 294 atomic_set(&tsk->usage,2);
295 atomic_set(&tsk->fs_excl, 0); 295 atomic_set(&tsk->fs_excl, 0);
296 #ifdef CONFIG_BLK_DEV_IO_TRACE 296 #ifdef CONFIG_BLK_DEV_IO_TRACE
297 tsk->btrace_seq = 0; 297 tsk->btrace_seq = 0;
298 #endif 298 #endif
299 tsk->splice_pipe = NULL; 299 tsk->splice_pipe = NULL;
300 300
301 account_kernel_stack(ti, 1); 301 account_kernel_stack(ti, 1);
302 302
303 return tsk; 303 return tsk;
304 304
305 out: 305 out:
306 free_thread_info(ti); 306 free_thread_info(ti);
307 free_task_struct(tsk); 307 free_task_struct(tsk);
308 return NULL; 308 return NULL;
309 } 309 }
310 310
311 #ifdef CONFIG_MMU 311 #ifdef CONFIG_MMU
312 static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) 312 static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
313 { 313 {
314 struct vm_area_struct *mpnt, *tmp, *prev, **pprev; 314 struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
315 struct rb_node **rb_link, *rb_parent; 315 struct rb_node **rb_link, *rb_parent;
316 int retval; 316 int retval;
317 unsigned long charge; 317 unsigned long charge;
318 struct mempolicy *pol; 318 struct mempolicy *pol;
319 319
320 down_write(&oldmm->mmap_sem); 320 down_write(&oldmm->mmap_sem);
321 flush_cache_dup_mm(oldmm); 321 flush_cache_dup_mm(oldmm);
322 /* 322 /*
323 * Not linked in yet - no deadlock potential: 323 * Not linked in yet - no deadlock potential:
324 */ 324 */
325 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); 325 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
326 326
327 mm->locked_vm = 0; 327 mm->locked_vm = 0;
328 mm->mmap = NULL; 328 mm->mmap = NULL;
329 mm->mmap_cache = NULL; 329 mm->mmap_cache = NULL;
330 mm->free_area_cache = oldmm->mmap_base; 330 mm->free_area_cache = oldmm->mmap_base;
331 mm->cached_hole_size = ~0UL; 331 mm->cached_hole_size = ~0UL;
332 mm->map_count = 0; 332 mm->map_count = 0;
333 cpumask_clear(mm_cpumask(mm)); 333 cpumask_clear(mm_cpumask(mm));
334 mm->mm_rb = RB_ROOT; 334 mm->mm_rb = RB_ROOT;
335 rb_link = &mm->mm_rb.rb_node; 335 rb_link = &mm->mm_rb.rb_node;
336 rb_parent = NULL; 336 rb_parent = NULL;
337 pprev = &mm->mmap; 337 pprev = &mm->mmap;
338 retval = ksm_fork(mm, oldmm); 338 retval = ksm_fork(mm, oldmm);
339 if (retval) 339 if (retval)
340 goto out; 340 goto out;
341 retval = khugepaged_fork(mm, oldmm); 341 retval = khugepaged_fork(mm, oldmm);
342 if (retval) 342 if (retval)
343 goto out; 343 goto out;
344 344
345 prev = NULL; 345 prev = NULL;
346 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 346 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
347 struct file *file; 347 struct file *file;
348 348
349 if (mpnt->vm_flags & VM_DONTCOPY) { 349 if (mpnt->vm_flags & VM_DONTCOPY) {
350 long pages = vma_pages(mpnt); 350 long pages = vma_pages(mpnt);
351 mm->total_vm -= pages; 351 mm->total_vm -= pages;
352 vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, 352 vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
353 -pages); 353 -pages);
354 continue; 354 continue;
355 } 355 }
356 charge = 0; 356 charge = 0;
357 if (mpnt->vm_flags & VM_ACCOUNT) { 357 if (mpnt->vm_flags & VM_ACCOUNT) {
358 unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; 358 unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
359 if (security_vm_enough_memory(len)) 359 if (security_vm_enough_memory(len))
360 goto fail_nomem; 360 goto fail_nomem;
361 charge = len; 361 charge = len;
362 } 362 }
363 tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 363 tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
364 if (!tmp) 364 if (!tmp)
365 goto fail_nomem; 365 goto fail_nomem;
366 *tmp = *mpnt; 366 *tmp = *mpnt;
367 INIT_LIST_HEAD(&tmp->anon_vma_chain); 367 INIT_LIST_HEAD(&tmp->anon_vma_chain);
368 pol = mpol_dup(vma_policy(mpnt)); 368 pol = mpol_dup(vma_policy(mpnt));
369 retval = PTR_ERR(pol); 369 retval = PTR_ERR(pol);
370 if (IS_ERR(pol)) 370 if (IS_ERR(pol))
371 goto fail_nomem_policy; 371 goto fail_nomem_policy;
372 vma_set_policy(tmp, pol); 372 vma_set_policy(tmp, pol);
373 tmp->vm_mm = mm; 373 tmp->vm_mm = mm;
374 if (anon_vma_fork(tmp, mpnt)) 374 if (anon_vma_fork(tmp, mpnt))
375 goto fail_nomem_anon_vma_fork; 375 goto fail_nomem_anon_vma_fork;
376 tmp->vm_flags &= ~VM_LOCKED; 376 tmp->vm_flags &= ~VM_LOCKED;
377 tmp->vm_next = tmp->vm_prev = NULL; 377 tmp->vm_next = tmp->vm_prev = NULL;
378 file = tmp->vm_file; 378 file = tmp->vm_file;
379 if (file) { 379 if (file) {
380 struct inode *inode = file->f_path.dentry->d_inode; 380 struct inode *inode = file->f_path.dentry->d_inode;
381 struct address_space *mapping = file->f_mapping; 381 struct address_space *mapping = file->f_mapping;
382 382
383 get_file(file); 383 get_file(file);
384 if (tmp->vm_flags & VM_DENYWRITE) 384 if (tmp->vm_flags & VM_DENYWRITE)
385 atomic_dec(&inode->i_writecount); 385 atomic_dec(&inode->i_writecount);
386 spin_lock(&mapping->i_mmap_lock); 386 spin_lock(&mapping->i_mmap_lock);
387 if (tmp->vm_flags & VM_SHARED) 387 if (tmp->vm_flags & VM_SHARED)
388 mapping->i_mmap_writable++; 388 mapping->i_mmap_writable++;
389 tmp->vm_truncate_count = mpnt->vm_truncate_count; 389 tmp->vm_truncate_count = mpnt->vm_truncate_count;
390 flush_dcache_mmap_lock(mapping); 390 flush_dcache_mmap_lock(mapping);
391 /* insert tmp into the share list, just after mpnt */ 391 /* insert tmp into the share list, just after mpnt */
392 vma_prio_tree_add(tmp, mpnt); 392 vma_prio_tree_add(tmp, mpnt);
393 flush_dcache_mmap_unlock(mapping); 393 flush_dcache_mmap_unlock(mapping);
394 spin_unlock(&mapping->i_mmap_lock); 394 spin_unlock(&mapping->i_mmap_lock);
395 } 395 }
396 396
397 /* 397 /*
398 * Clear hugetlb-related page reserves for children. This only 398 * Clear hugetlb-related page reserves for children. This only
399 * affects MAP_PRIVATE mappings. Faults generated by the child 399 * affects MAP_PRIVATE mappings. Faults generated by the child
400 * are not guaranteed to succeed, even if read-only 400 * are not guaranteed to succeed, even if read-only
401 */ 401 */
402 if (is_vm_hugetlb_page(tmp)) 402 if (is_vm_hugetlb_page(tmp))
403 reset_vma_resv_huge_pages(tmp); 403 reset_vma_resv_huge_pages(tmp);
404 404
405 /* 405 /*
406 * Link in the new vma and copy the page table entries. 406 * Link in the new vma and copy the page table entries.
407 */ 407 */
408 *pprev = tmp; 408 *pprev = tmp;
409 pprev = &tmp->vm_next; 409 pprev = &tmp->vm_next;
410 tmp->vm_prev = prev; 410 tmp->vm_prev = prev;
411 prev = tmp; 411 prev = tmp;
412 412
413 __vma_link_rb(mm, tmp, rb_link, rb_parent); 413 __vma_link_rb(mm, tmp, rb_link, rb_parent);
414 rb_link = &tmp->vm_rb.rb_right; 414 rb_link = &tmp->vm_rb.rb_right;
415 rb_parent = &tmp->vm_rb; 415 rb_parent = &tmp->vm_rb;
416 416
417 mm->map_count++; 417 mm->map_count++;
418 retval = copy_page_range(mm, oldmm, mpnt); 418 retval = copy_page_range(mm, oldmm, mpnt);
419 419
420 if (tmp->vm_ops && tmp->vm_ops->open) 420 if (tmp->vm_ops && tmp->vm_ops->open)
421 tmp->vm_ops->open(tmp); 421 tmp->vm_ops->open(tmp);
422 422
423 if (retval) 423 if (retval)
424 goto out; 424 goto out;
425 } 425 }
426 /* a new mm has just been created */ 426 /* a new mm has just been created */
427 arch_dup_mmap(oldmm, mm); 427 arch_dup_mmap(oldmm, mm);
428 retval = 0; 428 retval = 0;
429 out: 429 out:
430 up_write(&mm->mmap_sem); 430 up_write(&mm->mmap_sem);
431 flush_tlb_mm(oldmm); 431 flush_tlb_mm(oldmm);
432 up_write(&oldmm->mmap_sem); 432 up_write(&oldmm->mmap_sem);
433 return retval; 433 return retval;
434 fail_nomem_anon_vma_fork: 434 fail_nomem_anon_vma_fork:
435 mpol_put(pol); 435 mpol_put(pol);
436 fail_nomem_policy: 436 fail_nomem_policy:
437 kmem_cache_free(vm_area_cachep, tmp); 437 kmem_cache_free(vm_area_cachep, tmp);
438 fail_nomem: 438 fail_nomem:
439 retval = -ENOMEM; 439 retval = -ENOMEM;
440 vm_unacct_memory(charge); 440 vm_unacct_memory(charge);
441 goto out; 441 goto out;
442 } 442 }
443 443
444 static inline int mm_alloc_pgd(struct mm_struct * mm) 444 static inline int mm_alloc_pgd(struct mm_struct * mm)
445 { 445 {
446 mm->pgd = pgd_alloc(mm); 446 mm->pgd = pgd_alloc(mm);
447 if (unlikely(!mm->pgd)) 447 if (unlikely(!mm->pgd))
448 return -ENOMEM; 448 return -ENOMEM;
449 return 0; 449 return 0;
450 } 450 }
451 451
452 static inline void mm_free_pgd(struct mm_struct * mm) 452 static inline void mm_free_pgd(struct mm_struct * mm)
453 { 453 {
454 pgd_free(mm, mm->pgd); 454 pgd_free(mm, mm->pgd);
455 } 455 }
456 #else 456 #else
457 #define dup_mmap(mm, oldmm) (0) 457 #define dup_mmap(mm, oldmm) (0)
458 #define mm_alloc_pgd(mm) (0) 458 #define mm_alloc_pgd(mm) (0)
459 #define mm_free_pgd(mm) 459 #define mm_free_pgd(mm)
460 #endif /* CONFIG_MMU */ 460 #endif /* CONFIG_MMU */
461 461
462 __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); 462 __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
463 463
464 #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) 464 #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
465 #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) 465 #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
466 466
467 static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; 467 static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
468 468
469 static int __init coredump_filter_setup(char *s) 469 static int __init coredump_filter_setup(char *s)
470 { 470 {
471 default_dump_filter = 471 default_dump_filter =
472 (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & 472 (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
473 MMF_DUMP_FILTER_MASK; 473 MMF_DUMP_FILTER_MASK;
474 return 1; 474 return 1;
475 } 475 }
476 476
477 __setup("coredump_filter=", coredump_filter_setup); 477 __setup("coredump_filter=", coredump_filter_setup);
478 478
479 #include <linux/init_task.h> 479 #include <linux/init_task.h>
480 480
481 static void mm_init_aio(struct mm_struct *mm) 481 static void mm_init_aio(struct mm_struct *mm)
482 { 482 {
483 #ifdef CONFIG_AIO 483 #ifdef CONFIG_AIO
484 spin_lock_init(&mm->ioctx_lock); 484 spin_lock_init(&mm->ioctx_lock);
485 INIT_HLIST_HEAD(&mm->ioctx_list); 485 INIT_HLIST_HEAD(&mm->ioctx_list);
486 #endif 486 #endif
487 } 487 }
488 488
489 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) 489 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
490 { 490 {
491 atomic_set(&mm->mm_users, 1); 491 atomic_set(&mm->mm_users, 1);
492 atomic_set(&mm->mm_count, 1); 492 atomic_set(&mm->mm_count, 1);
493 init_rwsem(&mm->mmap_sem); 493 init_rwsem(&mm->mmap_sem);
494 INIT_LIST_HEAD(&mm->mmlist); 494 INIT_LIST_HEAD(&mm->mmlist);
495 mm->flags = (current->mm) ? 495 mm->flags = (current->mm) ?
496 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; 496 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
497 mm->core_state = NULL; 497 mm->core_state = NULL;
498 mm->nr_ptes = 0; 498 mm->nr_ptes = 0;
499 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); 499 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
500 spin_lock_init(&mm->page_table_lock); 500 spin_lock_init(&mm->page_table_lock);
501 mm->free_area_cache = TASK_UNMAPPED_BASE; 501 mm->free_area_cache = TASK_UNMAPPED_BASE;
502 mm->cached_hole_size = ~0UL; 502 mm->cached_hole_size = ~0UL;
503 mm_init_aio(mm); 503 mm_init_aio(mm);
504 mm_init_owner(mm, p); 504 mm_init_owner(mm, p);
505 atomic_set(&mm->oom_disable_count, 0); 505 atomic_set(&mm->oom_disable_count, 0);
506 506
507 if (likely(!mm_alloc_pgd(mm))) { 507 if (likely(!mm_alloc_pgd(mm))) {
508 mm->def_flags = 0; 508 mm->def_flags = 0;
509 mmu_notifier_mm_init(mm); 509 mmu_notifier_mm_init(mm);
510 return mm; 510 return mm;
511 } 511 }
512 512
513 free_mm(mm); 513 free_mm(mm);
514 return NULL; 514 return NULL;
515 } 515 }
516 516
517 /* 517 /*
518 * Allocate and initialize an mm_struct. 518 * Allocate and initialize an mm_struct.
519 */ 519 */
520 struct mm_struct * mm_alloc(void) 520 struct mm_struct * mm_alloc(void)
521 { 521 {
522 struct mm_struct * mm; 522 struct mm_struct * mm;
523 523
524 mm = allocate_mm(); 524 mm = allocate_mm();
525 if (mm) { 525 if (mm) {
526 memset(mm, 0, sizeof(*mm)); 526 memset(mm, 0, sizeof(*mm));
527 mm = mm_init(mm, current); 527 mm = mm_init(mm, current);
528 } 528 }
529 return mm; 529 return mm;
530 } 530 }
531 531
532 /* 532 /*
533 * Called when the last reference to the mm 533 * Called when the last reference to the mm
534 * is dropped: either by a lazy thread or by 534 * is dropped: either by a lazy thread or by
535 * mmput. Free the page directory and the mm. 535 * mmput. Free the page directory and the mm.
536 */ 536 */
537 void __mmdrop(struct mm_struct *mm) 537 void __mmdrop(struct mm_struct *mm)
538 { 538 {
539 BUG_ON(mm == &init_mm); 539 BUG_ON(mm == &init_mm);
540 mm_free_pgd(mm); 540 mm_free_pgd(mm);
541 destroy_context(mm); 541 destroy_context(mm);
542 mmu_notifier_mm_destroy(mm); 542 mmu_notifier_mm_destroy(mm);
543 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 543 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
544 VM_BUG_ON(mm->pmd_huge_pte); 544 VM_BUG_ON(mm->pmd_huge_pte);
545 #endif 545 #endif
546 free_mm(mm); 546 free_mm(mm);
547 } 547 }
548 EXPORT_SYMBOL_GPL(__mmdrop); 548 EXPORT_SYMBOL_GPL(__mmdrop);
549 549
550 /* 550 /*
551 * Decrement the use count and release all resources for an mm. 551 * Decrement the use count and release all resources for an mm.
552 */ 552 */
553 void mmput(struct mm_struct *mm) 553 void mmput(struct mm_struct *mm)
554 { 554 {
555 might_sleep(); 555 might_sleep();
556 556
557 if (atomic_dec_and_test(&mm->mm_users)) { 557 if (atomic_dec_and_test(&mm->mm_users)) {
558 exit_aio(mm); 558 exit_aio(mm);
559 ksm_exit(mm); 559 ksm_exit(mm);
560 khugepaged_exit(mm); /* must run before exit_mmap */ 560 khugepaged_exit(mm); /* must run before exit_mmap */
561 exit_mmap(mm); 561 exit_mmap(mm);
562 set_mm_exe_file(mm, NULL); 562 set_mm_exe_file(mm, NULL);
563 if (!list_empty(&mm->mmlist)) { 563 if (!list_empty(&mm->mmlist)) {
564 spin_lock(&mmlist_lock); 564 spin_lock(&mmlist_lock);
565 list_del(&mm->mmlist); 565 list_del(&mm->mmlist);
566 spin_unlock(&mmlist_lock); 566 spin_unlock(&mmlist_lock);
567 } 567 }
568 put_swap_token(mm); 568 put_swap_token(mm);
569 if (mm->binfmt) 569 if (mm->binfmt)
570 module_put(mm->binfmt->module); 570 module_put(mm->binfmt->module);
571 mmdrop(mm); 571 mmdrop(mm);
572 } 572 }
573 } 573 }
574 EXPORT_SYMBOL_GPL(mmput); 574 EXPORT_SYMBOL_GPL(mmput);
575 575
576 /** 576 /**
577 * get_task_mm - acquire a reference to the task's mm 577 * get_task_mm - acquire a reference to the task's mm
578 * 578 *
579 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning 579 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
580 * this kernel workthread has transiently adopted a user mm with use_mm, 580 * this kernel workthread has transiently adopted a user mm with use_mm,
581 * to do its AIO) is not set and if so returns a reference to it, after 581 * to do its AIO) is not set and if so returns a reference to it, after
582 * bumping up the use count. User must release the mm via mmput() 582 * bumping up the use count. User must release the mm via mmput()
583 * after use. Typically used by /proc and ptrace. 583 * after use. Typically used by /proc and ptrace.
584 */ 584 */
585 struct mm_struct *get_task_mm(struct task_struct *task) 585 struct mm_struct *get_task_mm(struct task_struct *task)
586 { 586 {
587 struct mm_struct *mm; 587 struct mm_struct *mm;
588 588
589 task_lock(task); 589 task_lock(task);
590 mm = task->mm; 590 mm = task->mm;
591 if (mm) { 591 if (mm) {
592 if (task->flags & PF_KTHREAD) 592 if (task->flags & PF_KTHREAD)
593 mm = NULL; 593 mm = NULL;
594 else 594 else
595 atomic_inc(&mm->mm_users); 595 atomic_inc(&mm->mm_users);
596 } 596 }
597 task_unlock(task); 597 task_unlock(task);
598 return mm; 598 return mm;
599 } 599 }
600 EXPORT_SYMBOL_GPL(get_task_mm); 600 EXPORT_SYMBOL_GPL(get_task_mm);
601 601
602 /* Please note the differences between mmput and mm_release. 602 /* Please note the differences between mmput and mm_release.
603 * mmput is called whenever we stop holding onto a mm_struct, 603 * mmput is called whenever we stop holding onto a mm_struct,
604 * error success whatever. 604 * error success whatever.
605 * 605 *
606 * mm_release is called after a mm_struct has been removed 606 * mm_release is called after a mm_struct has been removed
607 * from the current process. 607 * from the current process.
608 * 608 *
609 * This difference is important for error handling, when we 609 * This difference is important for error handling, when we
610 * only half set up a mm_struct for a new process and need to restore 610 * only half set up a mm_struct for a new process and need to restore
611 * the old one. Because we mmput the new mm_struct before 611 * the old one. Because we mmput the new mm_struct before
612 * restoring the old one. . . 612 * restoring the old one. . .
613 * Eric Biederman 10 January 1998 613 * Eric Biederman 10 January 1998
614 */ 614 */
615 void mm_release(struct task_struct *tsk, struct mm_struct *mm) 615 void mm_release(struct task_struct *tsk, struct mm_struct *mm)
616 { 616 {
617 struct completion *vfork_done = tsk->vfork_done; 617 struct completion *vfork_done = tsk->vfork_done;
618 618
619 /* Get rid of any futexes when releasing the mm */ 619 /* Get rid of any futexes when releasing the mm */
620 #ifdef CONFIG_FUTEX 620 #ifdef CONFIG_FUTEX
621 if (unlikely(tsk->robust_list)) { 621 if (unlikely(tsk->robust_list)) {
622 exit_robust_list(tsk); 622 exit_robust_list(tsk);
623 tsk->robust_list = NULL; 623 tsk->robust_list = NULL;
624 } 624 }
625 #ifdef CONFIG_COMPAT 625 #ifdef CONFIG_COMPAT
626 if (unlikely(tsk->compat_robust_list)) { 626 if (unlikely(tsk->compat_robust_list)) {
627 compat_exit_robust_list(tsk); 627 compat_exit_robust_list(tsk);
628 tsk->compat_robust_list = NULL; 628 tsk->compat_robust_list = NULL;
629 } 629 }
630 #endif 630 #endif
631 if (unlikely(!list_empty(&tsk->pi_state_list))) 631 if (unlikely(!list_empty(&tsk->pi_state_list)))
632 exit_pi_state_list(tsk); 632 exit_pi_state_list(tsk);
633 #endif 633 #endif
634 634
635 /* Get rid of any cached register state */ 635 /* Get rid of any cached register state */
636 deactivate_mm(tsk, mm); 636 deactivate_mm(tsk, mm);
637 637
638 /* notify parent sleeping on vfork() */ 638 /* notify parent sleeping on vfork() */
639 if (vfork_done) { 639 if (vfork_done) {
640 tsk->vfork_done = NULL; 640 tsk->vfork_done = NULL;
641 complete(vfork_done); 641 complete(vfork_done);
642 } 642 }
643 643
644 /* 644 /*
645 * If we're exiting normally, clear a user-space tid field if 645 * If we're exiting normally, clear a user-space tid field if
646 * requested. We leave this alone when dying by signal, to leave 646 * requested. We leave this alone when dying by signal, to leave
647 * the value intact in a core dump, and to save the unnecessary 647 * the value intact in a core dump, and to save the unnecessary
648 * trouble otherwise. Userland only wants this done for a sys_exit. 648 * trouble otherwise. Userland only wants this done for a sys_exit.
649 */ 649 */
650 if (tsk->clear_child_tid) { 650 if (tsk->clear_child_tid) {
651 if (!(tsk->flags & PF_SIGNALED) && 651 if (!(tsk->flags & PF_SIGNALED) &&
652 atomic_read(&mm->mm_users) > 1) { 652 atomic_read(&mm->mm_users) > 1) {
653 /* 653 /*
654 * We don't check the error code - if userspace has 654 * We don't check the error code - if userspace has
655 * not set up a proper pointer then tough luck. 655 * not set up a proper pointer then tough luck.
656 */ 656 */
657 put_user(0, tsk->clear_child_tid); 657 put_user(0, tsk->clear_child_tid);
658 sys_futex(tsk->clear_child_tid, FUTEX_WAKE, 658 sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
659 1, NULL, NULL, 0); 659 1, NULL, NULL, 0);
660 } 660 }
661 tsk->clear_child_tid = NULL; 661 tsk->clear_child_tid = NULL;
662 } 662 }
663 } 663 }
664 664
665 /* 665 /*
666 * Allocate a new mm structure and copy contents from the 666 * Allocate a new mm structure and copy contents from the
667 * mm structure of the passed in task structure. 667 * mm structure of the passed in task structure.
668 */ 668 */
669 struct mm_struct *dup_mm(struct task_struct *tsk) 669 struct mm_struct *dup_mm(struct task_struct *tsk)
670 { 670 {
671 struct mm_struct *mm, *oldmm = current->mm; 671 struct mm_struct *mm, *oldmm = current->mm;
672 int err; 672 int err;
673 673
674 if (!oldmm) 674 if (!oldmm)
675 return NULL; 675 return NULL;
676 676
677 mm = allocate_mm(); 677 mm = allocate_mm();
678 if (!mm) 678 if (!mm)
679 goto fail_nomem; 679 goto fail_nomem;
680 680
681 memcpy(mm, oldmm, sizeof(*mm)); 681 memcpy(mm, oldmm, sizeof(*mm));
682 682
683 /* Initializing for Swap token stuff */ 683 /* Initializing for Swap token stuff */
684 mm->token_priority = 0; 684 mm->token_priority = 0;
685 mm->last_interval = 0; 685 mm->last_interval = 0;
686 686
687 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 687 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
688 mm->pmd_huge_pte = NULL; 688 mm->pmd_huge_pte = NULL;
689 #endif 689 #endif
690 690
691 if (!mm_init(mm, tsk)) 691 if (!mm_init(mm, tsk))
692 goto fail_nomem; 692 goto fail_nomem;
693 693
694 if (init_new_context(tsk, mm)) 694 if (init_new_context(tsk, mm))
695 goto fail_nocontext; 695 goto fail_nocontext;
696 696
697 dup_mm_exe_file(oldmm, mm); 697 dup_mm_exe_file(oldmm, mm);
698 698
699 err = dup_mmap(mm, oldmm); 699 err = dup_mmap(mm, oldmm);
700 if (err) 700 if (err)
701 goto free_pt; 701 goto free_pt;
702 702
703 mm->hiwater_rss = get_mm_rss(mm); 703 mm->hiwater_rss = get_mm_rss(mm);
704 mm->hiwater_vm = mm->total_vm; 704 mm->hiwater_vm = mm->total_vm;
705 705
706 if (mm->binfmt && !try_module_get(mm->binfmt->module)) 706 if (mm->binfmt && !try_module_get(mm->binfmt->module))
707 goto free_pt; 707 goto free_pt;
708 708
709 return mm; 709 return mm;
710 710
711 free_pt: 711 free_pt:
712 /* don't put binfmt in mmput, we haven't got module yet */ 712 /* don't put binfmt in mmput, we haven't got module yet */
713 mm->binfmt = NULL; 713 mm->binfmt = NULL;
714 mmput(mm); 714 mmput(mm);
715 715
716 fail_nomem: 716 fail_nomem:
717 return NULL; 717 return NULL;
718 718
719 fail_nocontext: 719 fail_nocontext:
720 /* 720 /*
721 * If init_new_context() failed, we cannot use mmput() to free the mm 721 * If init_new_context() failed, we cannot use mmput() to free the mm
722 * because it calls destroy_context() 722 * because it calls destroy_context()
723 */ 723 */
724 mm_free_pgd(mm); 724 mm_free_pgd(mm);
725 free_mm(mm); 725 free_mm(mm);
726 return NULL; 726 return NULL;
727 } 727 }
728 728
729 static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) 729 static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
730 { 730 {
731 struct mm_struct * mm, *oldmm; 731 struct mm_struct * mm, *oldmm;
732 int retval; 732 int retval;
733 733
734 tsk->min_flt = tsk->maj_flt = 0; 734 tsk->min_flt = tsk->maj_flt = 0;
735 tsk->nvcsw = tsk->nivcsw = 0; 735 tsk->nvcsw = tsk->nivcsw = 0;
736 #ifdef CONFIG_DETECT_HUNG_TASK 736 #ifdef CONFIG_DETECT_HUNG_TASK
737 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; 737 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
738 #endif 738 #endif
739 739
740 tsk->mm = NULL; 740 tsk->mm = NULL;
741 tsk->active_mm = NULL; 741 tsk->active_mm = NULL;
742 742
743 /* 743 /*
744 * Are we cloning a kernel thread? 744 * Are we cloning a kernel thread?
745 * 745 *
746 * We need to steal a active VM for that.. 746 * We need to steal a active VM for that..
747 */ 747 */
748 oldmm = current->mm; 748 oldmm = current->mm;
749 if (!oldmm) 749 if (!oldmm)
750 return 0; 750 return 0;
751 751
752 if (clone_flags & CLONE_VM) { 752 if (clone_flags & CLONE_VM) {
753 atomic_inc(&oldmm->mm_users); 753 atomic_inc(&oldmm->mm_users);
754 mm = oldmm; 754 mm = oldmm;
755 goto good_mm; 755 goto good_mm;
756 } 756 }
757 757
758 retval = -ENOMEM; 758 retval = -ENOMEM;
759 mm = dup_mm(tsk); 759 mm = dup_mm(tsk);
760 if (!mm) 760 if (!mm)
761 goto fail_nomem; 761 goto fail_nomem;
762 762
763 good_mm: 763 good_mm:
764 /* Initializing for Swap token stuff */ 764 /* Initializing for Swap token stuff */
765 mm->token_priority = 0; 765 mm->token_priority = 0;
766 mm->last_interval = 0; 766 mm->last_interval = 0;
767 if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) 767 if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
768 atomic_inc(&mm->oom_disable_count); 768 atomic_inc(&mm->oom_disable_count);
769 769
770 tsk->mm = mm; 770 tsk->mm = mm;
771 tsk->active_mm = mm; 771 tsk->active_mm = mm;
772 return 0; 772 return 0;
773 773
774 fail_nomem: 774 fail_nomem:
775 return retval; 775 return retval;
776 } 776 }
777 777
778 static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) 778 static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
779 { 779 {
780 struct fs_struct *fs = current->fs; 780 struct fs_struct *fs = current->fs;
781 if (clone_flags & CLONE_FS) { 781 if (clone_flags & CLONE_FS) {
782 /* tsk->fs is already what we want */ 782 /* tsk->fs is already what we want */
783 spin_lock(&fs->lock); 783 spin_lock(&fs->lock);
784 if (fs->in_exec) { 784 if (fs->in_exec) {
785 spin_unlock(&fs->lock); 785 spin_unlock(&fs->lock);
786 return -EAGAIN; 786 return -EAGAIN;
787 } 787 }
788 fs->users++; 788 fs->users++;
789 spin_unlock(&fs->lock); 789 spin_unlock(&fs->lock);
790 return 0; 790 return 0;
791 } 791 }
792 tsk->fs = copy_fs_struct(fs); 792 tsk->fs = copy_fs_struct(fs);
793 if (!tsk->fs) 793 if (!tsk->fs)
794 return -ENOMEM; 794 return -ENOMEM;
795 return 0; 795 return 0;
796 } 796 }
797 797
798 static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 798 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
799 { 799 {
800 struct files_struct *oldf, *newf; 800 struct files_struct *oldf, *newf;
801 int error = 0; 801 int error = 0;
802 802
803 /* 803 /*
804 * A background process may not have any files ... 804 * A background process may not have any files ...
805 */ 805 */
806 oldf = current->files; 806 oldf = current->files;
807 if (!oldf) 807 if (!oldf)
808 goto out; 808 goto out;
809 809
810 if (clone_flags & CLONE_FILES) { 810 if (clone_flags & CLONE_FILES) {
811 atomic_inc(&oldf->count); 811 atomic_inc(&oldf->count);
812 goto out; 812 goto out;
813 } 813 }
814 814
815 newf = dup_fd(oldf, &error); 815 newf = dup_fd(oldf, &error);
816 if (!newf) 816 if (!newf)
817 goto out; 817 goto out;
818 818
819 tsk->files = newf; 819 tsk->files = newf;
820 error = 0; 820 error = 0;
821 out: 821 out:
822 return error; 822 return error;
823 } 823 }
824 824
825 static int copy_io(unsigned long clone_flags, struct task_struct *tsk) 825 static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
826 { 826 {
827 #ifdef CONFIG_BLOCK 827 #ifdef CONFIG_BLOCK
828 struct io_context *ioc = current->io_context; 828 struct io_context *ioc = current->io_context;
829 829
830 if (!ioc) 830 if (!ioc)
831 return 0; 831 return 0;
832 /* 832 /*
833 * Share io context with parent, if CLONE_IO is set 833 * Share io context with parent, if CLONE_IO is set
834 */ 834 */
835 if (clone_flags & CLONE_IO) { 835 if (clone_flags & CLONE_IO) {
836 tsk->io_context = ioc_task_link(ioc); 836 tsk->io_context = ioc_task_link(ioc);
837 if (unlikely(!tsk->io_context)) 837 if (unlikely(!tsk->io_context))
838 return -ENOMEM; 838 return -ENOMEM;
839 } else if (ioprio_valid(ioc->ioprio)) { 839 } else if (ioprio_valid(ioc->ioprio)) {
840 tsk->io_context = alloc_io_context(GFP_KERNEL, -1); 840 tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
841 if (unlikely(!tsk->io_context)) 841 if (unlikely(!tsk->io_context))
842 return -ENOMEM; 842 return -ENOMEM;
843 843
844 tsk->io_context->ioprio = ioc->ioprio; 844 tsk->io_context->ioprio = ioc->ioprio;
845 } 845 }
846 #endif 846 #endif
847 return 0; 847 return 0;
848 } 848 }
849 849
850 static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) 850 static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
851 { 851 {
852 struct sighand_struct *sig; 852 struct sighand_struct *sig;
853 853
854 if (clone_flags & CLONE_SIGHAND) { 854 if (clone_flags & CLONE_SIGHAND) {
855 atomic_inc(&current->sighand->count); 855 atomic_inc(&current->sighand->count);
856 return 0; 856 return 0;
857 } 857 }
858 sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); 858 sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
859 rcu_assign_pointer(tsk->sighand, sig); 859 rcu_assign_pointer(tsk->sighand, sig);
860 if (!sig) 860 if (!sig)
861 return -ENOMEM; 861 return -ENOMEM;
862 atomic_set(&sig->count, 1); 862 atomic_set(&sig->count, 1);
863 memcpy(sig->action, current->sighand->action, sizeof(sig->action)); 863 memcpy(sig->action, current->sighand->action, sizeof(sig->action));
864 return 0; 864 return 0;
865 } 865 }
866 866
867 void __cleanup_sighand(struct sighand_struct *sighand) 867 void __cleanup_sighand(struct sighand_struct *sighand)
868 { 868 {
869 if (atomic_dec_and_test(&sighand->count)) 869 if (atomic_dec_and_test(&sighand->count))
870 kmem_cache_free(sighand_cachep, sighand); 870 kmem_cache_free(sighand_cachep, sighand);
871 } 871 }
872 872
873 873
874 /* 874 /*
875 * Initialize POSIX timer handling for a thread group. 875 * Initialize POSIX timer handling for a thread group.
876 */ 876 */
877 static void posix_cpu_timers_init_group(struct signal_struct *sig) 877 static void posix_cpu_timers_init_group(struct signal_struct *sig)
878 { 878 {
879 unsigned long cpu_limit; 879 unsigned long cpu_limit;
880 880
881 /* Thread group counters. */ 881 /* Thread group counters. */
882 thread_group_cputime_init(sig); 882 thread_group_cputime_init(sig);
883 883
884 cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); 884 cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
885 if (cpu_limit != RLIM_INFINITY) { 885 if (cpu_limit != RLIM_INFINITY) {
886 sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); 886 sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
887 sig->cputimer.running = 1; 887 sig->cputimer.running = 1;
888 } 888 }
889 889
890 /* The timer lists. */ 890 /* The timer lists. */
891 INIT_LIST_HEAD(&sig->cpu_timers[0]); 891 INIT_LIST_HEAD(&sig->cpu_timers[0]);
892 INIT_LIST_HEAD(&sig->cpu_timers[1]); 892 INIT_LIST_HEAD(&sig->cpu_timers[1]);
893 INIT_LIST_HEAD(&sig->cpu_timers[2]); 893 INIT_LIST_HEAD(&sig->cpu_timers[2]);
894 } 894 }
895 895
896 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) 896 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
897 { 897 {
898 struct signal_struct *sig; 898 struct signal_struct *sig;
899 899
900 if (clone_flags & CLONE_THREAD) 900 if (clone_flags & CLONE_THREAD)
901 return 0; 901 return 0;
902 902
903 sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL); 903 sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
904 tsk->signal = sig; 904 tsk->signal = sig;
905 if (!sig) 905 if (!sig)
906 return -ENOMEM; 906 return -ENOMEM;
907 907
908 sig->nr_threads = 1; 908 sig->nr_threads = 1;
909 atomic_set(&sig->live, 1); 909 atomic_set(&sig->live, 1);
910 atomic_set(&sig->sigcnt, 1); 910 atomic_set(&sig->sigcnt, 1);
911 init_waitqueue_head(&sig->wait_chldexit); 911 init_waitqueue_head(&sig->wait_chldexit);
912 if (clone_flags & CLONE_NEWPID) 912 if (clone_flags & CLONE_NEWPID)
913 sig->flags |= SIGNAL_UNKILLABLE; 913 sig->flags |= SIGNAL_UNKILLABLE;
914 sig->curr_target = tsk; 914 sig->curr_target = tsk;
915 init_sigpending(&sig->shared_pending); 915 init_sigpending(&sig->shared_pending);
916 INIT_LIST_HEAD(&sig->posix_timers); 916 INIT_LIST_HEAD(&sig->posix_timers);
917 917
918 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 918 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
919 sig->real_timer.function = it_real_fn; 919 sig->real_timer.function = it_real_fn;
920 920
921 task_lock(current->group_leader); 921 task_lock(current->group_leader);
922 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 922 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
923 task_unlock(current->group_leader); 923 task_unlock(current->group_leader);
924 924
925 posix_cpu_timers_init_group(sig); 925 posix_cpu_timers_init_group(sig);
926 926
927 tty_audit_fork(sig); 927 tty_audit_fork(sig);
928 sched_autogroup_fork(sig); 928 sched_autogroup_fork(sig);
929 929
930 sig->oom_adj = current->signal->oom_adj; 930 sig->oom_adj = current->signal->oom_adj;
931 sig->oom_score_adj = current->signal->oom_score_adj; 931 sig->oom_score_adj = current->signal->oom_score_adj;
932 sig->oom_score_adj_min = current->signal->oom_score_adj_min; 932 sig->oom_score_adj_min = current->signal->oom_score_adj_min;
933 933
934 mutex_init(&sig->cred_guard_mutex); 934 mutex_init(&sig->cred_guard_mutex);
935 935
936 return 0; 936 return 0;
937 } 937 }
938 938
939 static void copy_flags(unsigned long clone_flags, struct task_struct *p) 939 static void copy_flags(unsigned long clone_flags, struct task_struct *p)
940 { 940 {
941 unsigned long new_flags = p->flags; 941 unsigned long new_flags = p->flags;
942 942
943 new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); 943 new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
944 new_flags |= PF_FORKNOEXEC; 944 new_flags |= PF_FORKNOEXEC;
945 new_flags |= PF_STARTING; 945 new_flags |= PF_STARTING;
946 p->flags = new_flags; 946 p->flags = new_flags;
947 clear_freeze_flag(p); 947 clear_freeze_flag(p);
948 } 948 }
949 949
950 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) 950 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
951 { 951 {
952 current->clear_child_tid = tidptr; 952 current->clear_child_tid = tidptr;
953 953
954 return task_pid_vnr(current); 954 return task_pid_vnr(current);
955 } 955 }
956 956
957 static void rt_mutex_init_task(struct task_struct *p) 957 static void rt_mutex_init_task(struct task_struct *p)
958 { 958 {
959 raw_spin_lock_init(&p->pi_lock); 959 raw_spin_lock_init(&p->pi_lock);
960 #ifdef CONFIG_RT_MUTEXES 960 #ifdef CONFIG_RT_MUTEXES
961 plist_head_init_raw(&p->pi_waiters, &p->pi_lock); 961 plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
962 p->pi_blocked_on = NULL; 962 p->pi_blocked_on = NULL;
963 #endif 963 #endif
964 } 964 }
965 965
966 #ifdef CONFIG_MM_OWNER 966 #ifdef CONFIG_MM_OWNER
967 void mm_init_owner(struct mm_struct *mm, struct task_struct *p) 967 void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
968 { 968 {
969 mm->owner = p; 969 mm->owner = p;
970 } 970 }
971 #endif /* CONFIG_MM_OWNER */ 971 #endif /* CONFIG_MM_OWNER */
972 972
973 /* 973 /*
974 * Initialize POSIX timer handling for a single task. 974 * Initialize POSIX timer handling for a single task.
975 */ 975 */
976 static void posix_cpu_timers_init(struct task_struct *tsk) 976 static void posix_cpu_timers_init(struct task_struct *tsk)
977 { 977 {
978 tsk->cputime_expires.prof_exp = cputime_zero; 978 tsk->cputime_expires.prof_exp = cputime_zero;
979 tsk->cputime_expires.virt_exp = cputime_zero; 979 tsk->cputime_expires.virt_exp = cputime_zero;
980 tsk->cputime_expires.sched_exp = 0; 980 tsk->cputime_expires.sched_exp = 0;
981 INIT_LIST_HEAD(&tsk->cpu_timers[0]); 981 INIT_LIST_HEAD(&tsk->cpu_timers[0]);
982 INIT_LIST_HEAD(&tsk->cpu_timers[1]); 982 INIT_LIST_HEAD(&tsk->cpu_timers[1]);
983 INIT_LIST_HEAD(&tsk->cpu_timers[2]); 983 INIT_LIST_HEAD(&tsk->cpu_timers[2]);
984 } 984 }
985 985
986 /* 986 /*
987 * This creates a new process as a copy of the old one, 987 * This creates a new process as a copy of the old one,
988 * but does not actually start it yet. 988 * but does not actually start it yet.
989 * 989 *
990 * It copies the registers, and all the appropriate 990 * It copies the registers, and all the appropriate
991 * parts of the process environment (as per the clone 991 * parts of the process environment (as per the clone
992 * flags). The actual kick-off is left to the caller. 992 * flags). The actual kick-off is left to the caller.
993 */ 993 */
994 static struct task_struct *copy_process(unsigned long clone_flags, 994 static struct task_struct *copy_process(unsigned long clone_flags,
995 unsigned long stack_start, 995 unsigned long stack_start,
996 struct pt_regs *regs, 996 struct pt_regs *regs,
997 unsigned long stack_size, 997 unsigned long stack_size,
998 int __user *child_tidptr, 998 int __user *child_tidptr,
999 struct pid *pid, 999 struct pid *pid,
1000 int trace) 1000 int trace)
1001 { 1001 {
1002 int retval; 1002 int retval;
1003 struct task_struct *p; 1003 struct task_struct *p;
1004 int cgroup_callbacks_done = 0; 1004 int cgroup_callbacks_done = 0;
1005 1005
1006 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 1006 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
1007 return ERR_PTR(-EINVAL); 1007 return ERR_PTR(-EINVAL);
1008 1008
1009 /* 1009 /*
1010 * Thread groups must share signals as well, and detached threads 1010 * Thread groups must share signals as well, and detached threads
1011 * can only be started up within the thread group. 1011 * can only be started up within the thread group.
1012 */ 1012 */
1013 if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) 1013 if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
1014 return ERR_PTR(-EINVAL); 1014 return ERR_PTR(-EINVAL);
1015 1015
1016 /* 1016 /*
1017 * Shared signal handlers imply shared VM. By way of the above, 1017 * Shared signal handlers imply shared VM. By way of the above,
1018 * thread groups also imply shared VM. Blocking this case allows 1018 * thread groups also imply shared VM. Blocking this case allows
1019 * for various simplifications in other code. 1019 * for various simplifications in other code.
1020 */ 1020 */
1021 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 1021 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
1022 return ERR_PTR(-EINVAL); 1022 return ERR_PTR(-EINVAL);
1023 1023
1024 /* 1024 /*
1025 * Siblings of global init remain as zombies on exit since they are 1025 * Siblings of global init remain as zombies on exit since they are
1026 * not reaped by their parent (swapper). To solve this and to avoid 1026 * not reaped by their parent (swapper). To solve this and to avoid
1027 * multi-rooted process trees, prevent global and container-inits 1027 * multi-rooted process trees, prevent global and container-inits
1028 * from creating siblings. 1028 * from creating siblings.
1029 */ 1029 */
1030 if ((clone_flags & CLONE_PARENT) && 1030 if ((clone_flags & CLONE_PARENT) &&
1031 current->signal->flags & SIGNAL_UNKILLABLE) 1031 current->signal->flags & SIGNAL_UNKILLABLE)
1032 return ERR_PTR(-EINVAL); 1032 return ERR_PTR(-EINVAL);
1033 1033
1034 retval = security_task_create(clone_flags); 1034 retval = security_task_create(clone_flags);
1035 if (retval) 1035 if (retval)
1036 goto fork_out; 1036 goto fork_out;
1037 1037
1038 retval = -ENOMEM; 1038 retval = -ENOMEM;
1039 p = dup_task_struct(current); 1039 p = dup_task_struct(current);
1040 if (!p) 1040 if (!p)
1041 goto fork_out; 1041 goto fork_out;
1042 1042
1043 ftrace_graph_init_task(p); 1043 ftrace_graph_init_task(p);
1044 1044
1045 rt_mutex_init_task(p); 1045 rt_mutex_init_task(p);
1046 1046
1047 #ifdef CONFIG_PROVE_LOCKING 1047 #ifdef CONFIG_PROVE_LOCKING
1048 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); 1048 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
1049 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); 1049 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
1050 #endif 1050 #endif
1051 retval = -EAGAIN; 1051 retval = -EAGAIN;
1052 if (atomic_read(&p->real_cred->user->processes) >= 1052 if (atomic_read(&p->real_cred->user->processes) >=
1053 task_rlimit(p, RLIMIT_NPROC)) { 1053 task_rlimit(p, RLIMIT_NPROC)) {
1054 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && 1054 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
1055 p->real_cred->user != INIT_USER) 1055 p->real_cred->user != INIT_USER)
1056 goto bad_fork_free; 1056 goto bad_fork_free;
1057 } 1057 }
1058 1058
1059 retval = copy_creds(p, clone_flags); 1059 retval = copy_creds(p, clone_flags);
1060 if (retval < 0) 1060 if (retval < 0)
1061 goto bad_fork_free; 1061 goto bad_fork_free;
1062 1062
1063 /* 1063 /*
1064 * If multiple threads are within copy_process(), then this check 1064 * If multiple threads are within copy_process(), then this check
1065 * triggers too late. This doesn't hurt, the check is only there 1065 * triggers too late. This doesn't hurt, the check is only there
1066 * to stop root fork bombs. 1066 * to stop root fork bombs.
1067 */ 1067 */
1068 retval = -EAGAIN; 1068 retval = -EAGAIN;
1069 if (nr_threads >= max_threads) 1069 if (nr_threads >= max_threads)
1070 goto bad_fork_cleanup_count; 1070 goto bad_fork_cleanup_count;
1071 1071
1072 if (!try_module_get(task_thread_info(p)->exec_domain->module)) 1072 if (!try_module_get(task_thread_info(p)->exec_domain->module))
1073 goto bad_fork_cleanup_count; 1073 goto bad_fork_cleanup_count;
1074 1074
1075 p->did_exec = 0; 1075 p->did_exec = 0;
1076 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1076 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1077 copy_flags(clone_flags, p); 1077 copy_flags(clone_flags, p);
1078 INIT_LIST_HEAD(&p->children); 1078 INIT_LIST_HEAD(&p->children);
1079 INIT_LIST_HEAD(&p->sibling); 1079 INIT_LIST_HEAD(&p->sibling);
1080 rcu_copy_process(p); 1080 rcu_copy_process(p);
1081 p->vfork_done = NULL; 1081 p->vfork_done = NULL;
1082 spin_lock_init(&p->alloc_lock); 1082 spin_lock_init(&p->alloc_lock);
1083 1083
1084 init_sigpending(&p->pending); 1084 init_sigpending(&p->pending);
1085 1085
1086 p->utime = cputime_zero; 1086 p->utime = cputime_zero;
1087 p->stime = cputime_zero; 1087 p->stime = cputime_zero;
1088 p->gtime = cputime_zero; 1088 p->gtime = cputime_zero;
1089 p->utimescaled = cputime_zero; 1089 p->utimescaled = cputime_zero;
1090 p->stimescaled = cputime_zero; 1090 p->stimescaled = cputime_zero;
1091 #ifndef CONFIG_VIRT_CPU_ACCOUNTING 1091 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
1092 p->prev_utime = cputime_zero; 1092 p->prev_utime = cputime_zero;
1093 p->prev_stime = cputime_zero; 1093 p->prev_stime = cputime_zero;
1094 #endif 1094 #endif
1095 #if defined(SPLIT_RSS_COUNTING) 1095 #if defined(SPLIT_RSS_COUNTING)
1096 memset(&p->rss_stat, 0, sizeof(p->rss_stat)); 1096 memset(&p->rss_stat, 0, sizeof(p->rss_stat));
1097 #endif 1097 #endif
1098 1098
1099 p->default_timer_slack_ns = current->timer_slack_ns; 1099 p->default_timer_slack_ns = current->timer_slack_ns;
1100 1100
1101 task_io_accounting_init(&p->ioac); 1101 task_io_accounting_init(&p->ioac);
1102 acct_clear_integrals(p); 1102 acct_clear_integrals(p);
1103 1103
1104 posix_cpu_timers_init(p); 1104 posix_cpu_timers_init(p);
1105 1105
1106 p->lock_depth = -1; /* -1 = no lock */ 1106 p->lock_depth = -1; /* -1 = no lock */
1107 do_posix_clock_monotonic_gettime(&p->start_time); 1107 do_posix_clock_monotonic_gettime(&p->start_time);
1108 p->real_start_time = p->start_time; 1108 p->real_start_time = p->start_time;
1109 monotonic_to_bootbased(&p->real_start_time); 1109 monotonic_to_bootbased(&p->real_start_time);
1110 p->io_context = NULL; 1110 p->io_context = NULL;
1111 p->audit_context = NULL; 1111 p->audit_context = NULL;
1112 cgroup_fork(p); 1112 cgroup_fork(p);
1113 #ifdef CONFIG_NUMA 1113 #ifdef CONFIG_NUMA
1114 p->mempolicy = mpol_dup(p->mempolicy); 1114 p->mempolicy = mpol_dup(p->mempolicy);
1115 if (IS_ERR(p->mempolicy)) { 1115 if (IS_ERR(p->mempolicy)) {
1116 retval = PTR_ERR(p->mempolicy); 1116 retval = PTR_ERR(p->mempolicy);
1117 p->mempolicy = NULL; 1117 p->mempolicy = NULL;
1118 goto bad_fork_cleanup_cgroup; 1118 goto bad_fork_cleanup_cgroup;
1119 } 1119 }
1120 mpol_fix_fork_child_flag(p); 1120 mpol_fix_fork_child_flag(p);
1121 #endif 1121 #endif
1122 #ifdef CONFIG_TRACE_IRQFLAGS 1122 #ifdef CONFIG_TRACE_IRQFLAGS
1123 p->irq_events = 0; 1123 p->irq_events = 0;
1124 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 1124 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
1125 p->hardirqs_enabled = 1; 1125 p->hardirqs_enabled = 1;
1126 #else 1126 #else
1127 p->hardirqs_enabled = 0; 1127 p->hardirqs_enabled = 0;
1128 #endif 1128 #endif
1129 p->hardirq_enable_ip = 0; 1129 p->hardirq_enable_ip = 0;
1130 p->hardirq_enable_event = 0; 1130 p->hardirq_enable_event = 0;
1131 p->hardirq_disable_ip = _THIS_IP_; 1131 p->hardirq_disable_ip = _THIS_IP_;
1132 p->hardirq_disable_event = 0; 1132 p->hardirq_disable_event = 0;
1133 p->softirqs_enabled = 1; 1133 p->softirqs_enabled = 1;
1134 p->softirq_enable_ip = _THIS_IP_; 1134 p->softirq_enable_ip = _THIS_IP_;
1135 p->softirq_enable_event = 0; 1135 p->softirq_enable_event = 0;
1136 p->softirq_disable_ip = 0; 1136 p->softirq_disable_ip = 0;
1137 p->softirq_disable_event = 0; 1137 p->softirq_disable_event = 0;
1138 p->hardirq_context = 0; 1138 p->hardirq_context = 0;
1139 p->softirq_context = 0; 1139 p->softirq_context = 0;
1140 #endif 1140 #endif
1141 #ifdef CONFIG_LOCKDEP 1141 #ifdef CONFIG_LOCKDEP
1142 p->lockdep_depth = 0; /* no locks held yet */ 1142 p->lockdep_depth = 0; /* no locks held yet */
1143 p->curr_chain_key = 0; 1143 p->curr_chain_key = 0;
1144 p->lockdep_recursion = 0; 1144 p->lockdep_recursion = 0;
1145 #endif 1145 #endif
1146 1146
1147 #ifdef CONFIG_DEBUG_MUTEXES 1147 #ifdef CONFIG_DEBUG_MUTEXES
1148 p->blocked_on = NULL; /* not blocked yet */ 1148 p->blocked_on = NULL; /* not blocked yet */
1149 #endif 1149 #endif
1150 #ifdef CONFIG_CGROUP_MEM_RES_CTLR 1150 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
1151 p->memcg_batch.do_batch = 0; 1151 p->memcg_batch.do_batch = 0;
1152 p->memcg_batch.memcg = NULL; 1152 p->memcg_batch.memcg = NULL;
1153 #endif 1153 #endif
1154 1154
1155 /* Perform scheduler related setup. Assign this task to a CPU. */ 1155 /* Perform scheduler related setup. Assign this task to a CPU. */
1156 sched_fork(p, clone_flags); 1156 sched_fork(p, clone_flags);
1157 1157
1158 retval = perf_event_init_task(p); 1158 retval = perf_event_init_task(p);
1159 if (retval) 1159 if (retval)
1160 goto bad_fork_cleanup_policy; 1160 goto bad_fork_cleanup_policy;
1161 1161
1162 if ((retval = audit_alloc(p))) 1162 if ((retval = audit_alloc(p)))
1163 goto bad_fork_cleanup_policy; 1163 goto bad_fork_cleanup_policy;
1164 /* copy all the process information */ 1164 /* copy all the process information */
1165 if ((retval = copy_semundo(clone_flags, p))) 1165 if ((retval = copy_semundo(clone_flags, p)))
1166 goto bad_fork_cleanup_audit; 1166 goto bad_fork_cleanup_audit;
1167 if ((retval = copy_files(clone_flags, p))) 1167 if ((retval = copy_files(clone_flags, p)))
1168 goto bad_fork_cleanup_semundo; 1168 goto bad_fork_cleanup_semundo;
1169 if ((retval = copy_fs(clone_flags, p))) 1169 if ((retval = copy_fs(clone_flags, p)))
1170 goto bad_fork_cleanup_files; 1170 goto bad_fork_cleanup_files;
1171 if ((retval = copy_sighand(clone_flags, p))) 1171 if ((retval = copy_sighand(clone_flags, p)))
1172 goto bad_fork_cleanup_fs; 1172 goto bad_fork_cleanup_fs;
1173 if ((retval = copy_signal(clone_flags, p))) 1173 if ((retval = copy_signal(clone_flags, p)))
1174 goto bad_fork_cleanup_sighand; 1174 goto bad_fork_cleanup_sighand;
1175 if ((retval = copy_mm(clone_flags, p))) 1175 if ((retval = copy_mm(clone_flags, p)))
1176 goto bad_fork_cleanup_signal; 1176 goto bad_fork_cleanup_signal;
1177 if ((retval = copy_namespaces(clone_flags, p))) 1177 if ((retval = copy_namespaces(clone_flags, p)))
1178 goto bad_fork_cleanup_mm; 1178 goto bad_fork_cleanup_mm;
1179 if ((retval = copy_io(clone_flags, p))) 1179 if ((retval = copy_io(clone_flags, p)))
1180 goto bad_fork_cleanup_namespaces; 1180 goto bad_fork_cleanup_namespaces;
1181 retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); 1181 retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
1182 if (retval) 1182 if (retval)
1183 goto bad_fork_cleanup_io; 1183 goto bad_fork_cleanup_io;
1184 1184
1185 if (pid != &init_struct_pid) { 1185 if (pid != &init_struct_pid) {
1186 retval = -ENOMEM; 1186 retval = -ENOMEM;
1187 pid = alloc_pid(p->nsproxy->pid_ns); 1187 pid = alloc_pid(p->nsproxy->pid_ns);
1188 if (!pid) 1188 if (!pid)
1189 goto bad_fork_cleanup_io; 1189 goto bad_fork_cleanup_io;
1190 1190
1191 if (clone_flags & CLONE_NEWPID) { 1191 if (clone_flags & CLONE_NEWPID) {
1192 retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); 1192 retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
1193 if (retval < 0) 1193 if (retval < 0)
1194 goto bad_fork_free_pid; 1194 goto bad_fork_free_pid;
1195 } 1195 }
1196 } 1196 }
1197 1197
1198 p->pid = pid_nr(pid); 1198 p->pid = pid_nr(pid);
1199 p->tgid = p->pid; 1199 p->tgid = p->pid;
1200 if (clone_flags & CLONE_THREAD) 1200 if (clone_flags & CLONE_THREAD)
1201 p->tgid = current->tgid; 1201 p->tgid = current->tgid;
1202 1202
1203 if (current->nsproxy != p->nsproxy) { 1203 if (current->nsproxy != p->nsproxy) {
1204 retval = ns_cgroup_clone(p, pid); 1204 retval = ns_cgroup_clone(p, pid);
1205 if (retval) 1205 if (retval)
1206 goto bad_fork_free_pid; 1206 goto bad_fork_free_pid;
1207 } 1207 }
1208 1208
1209 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1209 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1210 /* 1210 /*
1211 * Clear TID on mm_release()? 1211 * Clear TID on mm_release()?
1212 */ 1212 */
1213 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1213 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1214 #ifdef CONFIG_FUTEX 1214 #ifdef CONFIG_FUTEX
1215 p->robust_list = NULL; 1215 p->robust_list = NULL;
1216 #ifdef CONFIG_COMPAT 1216 #ifdef CONFIG_COMPAT
1217 p->compat_robust_list = NULL; 1217 p->compat_robust_list = NULL;
1218 #endif 1218 #endif
1219 INIT_LIST_HEAD(&p->pi_state_list); 1219 INIT_LIST_HEAD(&p->pi_state_list);
1220 p->pi_state_cache = NULL; 1220 p->pi_state_cache = NULL;
1221 #endif 1221 #endif
1222 /* 1222 /*
1223 * sigaltstack should be cleared when sharing the same VM 1223 * sigaltstack should be cleared when sharing the same VM
1224 */ 1224 */
1225 if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) 1225 if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
1226 p->sas_ss_sp = p->sas_ss_size = 0; 1226 p->sas_ss_sp = p->sas_ss_size = 0;
1227 1227
1228 /* 1228 /*
1229 * Syscall tracing and stepping should be turned off in the 1229 * Syscall tracing and stepping should be turned off in the
1230 * child regardless of CLONE_PTRACE. 1230 * child regardless of CLONE_PTRACE.
1231 */ 1231 */
1232 user_disable_single_step(p); 1232 user_disable_single_step(p);
1233 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 1233 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
1234 #ifdef TIF_SYSCALL_EMU 1234 #ifdef TIF_SYSCALL_EMU
1235 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 1235 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
1236 #endif 1236 #endif
1237 clear_all_latency_tracing(p); 1237 clear_all_latency_tracing(p);
1238 1238
1239 /* ok, now we should be set up.. */ 1239 /* ok, now we should be set up.. */
1240 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); 1240 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
1241 p->pdeath_signal = 0; 1241 p->pdeath_signal = 0;
1242 p->exit_state = 0; 1242 p->exit_state = 0;
1243 1243
1244 /* 1244 /*
1245 * Ok, make it visible to the rest of the system. 1245 * Ok, make it visible to the rest of the system.
1246 * We dont wake it up yet. 1246 * We dont wake it up yet.
1247 */ 1247 */
1248 p->group_leader = p; 1248 p->group_leader = p;
1249 INIT_LIST_HEAD(&p->thread_group); 1249 INIT_LIST_HEAD(&p->thread_group);
1250 1250
1251 /* Now that the task is set up, run cgroup callbacks if 1251 /* Now that the task is set up, run cgroup callbacks if
1252 * necessary. We need to run them before the task is visible 1252 * necessary. We need to run them before the task is visible
1253 * on the tasklist. */ 1253 * on the tasklist. */
1254 cgroup_fork_callbacks(p); 1254 cgroup_fork_callbacks(p);
1255 cgroup_callbacks_done = 1; 1255 cgroup_callbacks_done = 1;
1256 1256
1257 /* Need tasklist lock for parent etc handling! */ 1257 /* Need tasklist lock for parent etc handling! */
1258 write_lock_irq(&tasklist_lock); 1258 write_lock_irq(&tasklist_lock);
1259 1259
1260 /* CLONE_PARENT re-uses the old parent */ 1260 /* CLONE_PARENT re-uses the old parent */
1261 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { 1261 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1262 p->real_parent = current->real_parent; 1262 p->real_parent = current->real_parent;
1263 p->parent_exec_id = current->parent_exec_id; 1263 p->parent_exec_id = current->parent_exec_id;
1264 } else { 1264 } else {
1265 p->real_parent = current; 1265 p->real_parent = current;
1266 p->parent_exec_id = current->self_exec_id; 1266 p->parent_exec_id = current->self_exec_id;
1267 } 1267 }
1268 1268
1269 spin_lock(&current->sighand->siglock); 1269 spin_lock(&current->sighand->siglock);
1270 1270
1271 /* 1271 /*
1272 * Process group and session signals need to be delivered to just the 1272 * Process group and session signals need to be delivered to just the
1273 * parent before the fork or both the parent and the child after the 1273 * parent before the fork or both the parent and the child after the
1274 * fork. Restart if a signal comes in before we add the new process to 1274 * fork. Restart if a signal comes in before we add the new process to
1275 * it's process group. 1275 * it's process group.
1276 * A fatal signal pending means that current will exit, so the new 1276 * A fatal signal pending means that current will exit, so the new
1277 * thread can't slip out of an OOM kill (or normal SIGKILL). 1277 * thread can't slip out of an OOM kill (or normal SIGKILL).
1278 */ 1278 */
1279 recalc_sigpending(); 1279 recalc_sigpending();
1280 if (signal_pending(current)) { 1280 if (signal_pending(current)) {
1281 spin_unlock(&current->sighand->siglock); 1281 spin_unlock(&current->sighand->siglock);
1282 write_unlock_irq(&tasklist_lock); 1282 write_unlock_irq(&tasklist_lock);
1283 retval = -ERESTARTNOINTR; 1283 retval = -ERESTARTNOINTR;
1284 goto bad_fork_free_pid; 1284 goto bad_fork_free_pid;
1285 } 1285 }
1286 1286
1287 if (clone_flags & CLONE_THREAD) { 1287 if (clone_flags & CLONE_THREAD) {
1288 current->signal->nr_threads++; 1288 current->signal->nr_threads++;
1289 atomic_inc(&current->signal->live); 1289 atomic_inc(&current->signal->live);
1290 atomic_inc(&current->signal->sigcnt); 1290 atomic_inc(&current->signal->sigcnt);
1291 p->group_leader = current->group_leader; 1291 p->group_leader = current->group_leader;
1292 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); 1292 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1293 } 1293 }
1294 1294
1295 if (likely(p->pid)) { 1295 if (likely(p->pid)) {
1296 tracehook_finish_clone(p, clone_flags, trace); 1296 tracehook_finish_clone(p, clone_flags, trace);
1297 1297
1298 if (thread_group_leader(p)) { 1298 if (thread_group_leader(p)) {
1299 if (clone_flags & CLONE_NEWPID) 1299 if (clone_flags & CLONE_NEWPID)
1300 p->nsproxy->pid_ns->child_reaper = p; 1300 p->nsproxy->pid_ns->child_reaper = p;
1301 1301
1302 p->signal->leader_pid = pid; 1302 p->signal->leader_pid = pid;
1303 p->signal->tty = tty_kref_get(current->signal->tty); 1303 p->signal->tty = tty_kref_get(current->signal->tty);
1304 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1304 attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
1305 attach_pid(p, PIDTYPE_SID, task_session(current)); 1305 attach_pid(p, PIDTYPE_SID, task_session(current));
1306 list_add_tail(&p->sibling, &p->real_parent->children); 1306 list_add_tail(&p->sibling, &p->real_parent->children);
1307 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1307 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1308 __this_cpu_inc(process_counts); 1308 __this_cpu_inc(process_counts);
1309 } 1309 }
1310 attach_pid(p, PIDTYPE_PID, pid); 1310 attach_pid(p, PIDTYPE_PID, pid);
1311 nr_threads++; 1311 nr_threads++;
1312 } 1312 }
1313 1313
1314 total_forks++; 1314 total_forks++;
1315 spin_unlock(&current->sighand->siglock); 1315 spin_unlock(&current->sighand->siglock);
1316 write_unlock_irq(&tasklist_lock); 1316 write_unlock_irq(&tasklist_lock);
1317 proc_fork_connector(p); 1317 proc_fork_connector(p);
1318 cgroup_post_fork(p); 1318 cgroup_post_fork(p);
1319 perf_event_fork(p); 1319 perf_event_fork(p);
1320 return p; 1320 return p;
1321 1321
1322 bad_fork_free_pid: 1322 bad_fork_free_pid:
1323 if (pid != &init_struct_pid) 1323 if (pid != &init_struct_pid)
1324 free_pid(pid); 1324 free_pid(pid);
1325 bad_fork_cleanup_io: 1325 bad_fork_cleanup_io:
1326 if (p->io_context) 1326 if (p->io_context)
1327 exit_io_context(p); 1327 exit_io_context(p);
1328 bad_fork_cleanup_namespaces: 1328 bad_fork_cleanup_namespaces:
1329 exit_task_namespaces(p); 1329 exit_task_namespaces(p);
1330 bad_fork_cleanup_mm: 1330 bad_fork_cleanup_mm:
1331 if (p->mm) { 1331 if (p->mm) {
1332 task_lock(p); 1332 task_lock(p);
1333 if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) 1333 if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1334 atomic_dec(&p->mm->oom_disable_count); 1334 atomic_dec(&p->mm->oom_disable_count);
1335 task_unlock(p); 1335 task_unlock(p);
1336 mmput(p->mm); 1336 mmput(p->mm);
1337 } 1337 }
1338 bad_fork_cleanup_signal: 1338 bad_fork_cleanup_signal:
1339 if (!(clone_flags & CLONE_THREAD)) 1339 if (!(clone_flags & CLONE_THREAD))
1340 free_signal_struct(p->signal); 1340 free_signal_struct(p->signal);
1341 bad_fork_cleanup_sighand: 1341 bad_fork_cleanup_sighand:
1342 __cleanup_sighand(p->sighand); 1342 __cleanup_sighand(p->sighand);
1343 bad_fork_cleanup_fs: 1343 bad_fork_cleanup_fs:
1344 exit_fs(p); /* blocking */ 1344 exit_fs(p); /* blocking */
1345 bad_fork_cleanup_files: 1345 bad_fork_cleanup_files:
1346 exit_files(p); /* blocking */ 1346 exit_files(p); /* blocking */
1347 bad_fork_cleanup_semundo: 1347 bad_fork_cleanup_semundo:
1348 exit_sem(p); 1348 exit_sem(p);
1349 bad_fork_cleanup_audit: 1349 bad_fork_cleanup_audit:
1350 audit_free(p); 1350 audit_free(p);
1351 bad_fork_cleanup_policy: 1351 bad_fork_cleanup_policy:
1352 perf_event_free_task(p); 1352 perf_event_free_task(p);
1353 #ifdef CONFIG_NUMA 1353 #ifdef CONFIG_NUMA
1354 mpol_put(p->mempolicy); 1354 mpol_put(p->mempolicy);
1355 bad_fork_cleanup_cgroup: 1355 bad_fork_cleanup_cgroup:
1356 #endif 1356 #endif
1357 cgroup_exit(p, cgroup_callbacks_done); 1357 cgroup_exit(p, cgroup_callbacks_done);
1358 delayacct_tsk_free(p); 1358 delayacct_tsk_free(p);
1359 module_put(task_thread_info(p)->exec_domain->module); 1359 module_put(task_thread_info(p)->exec_domain->module);
1360 bad_fork_cleanup_count: 1360 bad_fork_cleanup_count:
1361 atomic_dec(&p->cred->user->processes); 1361 atomic_dec(&p->cred->user->processes);
1362 exit_creds(p); 1362 exit_creds(p);
1363 bad_fork_free: 1363 bad_fork_free:
1364 free_task(p); 1364 free_task(p);
1365 fork_out: 1365 fork_out:
1366 return ERR_PTR(retval); 1366 return ERR_PTR(retval);
1367 } 1367 }
1368 1368
1369 noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs) 1369 noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1370 { 1370 {
1371 memset(regs, 0, sizeof(struct pt_regs)); 1371 memset(regs, 0, sizeof(struct pt_regs));
1372 return regs; 1372 return regs;
1373 } 1373 }
1374 1374
1375 static inline void init_idle_pids(struct pid_link *links) 1375 static inline void init_idle_pids(struct pid_link *links)
1376 { 1376 {
1377 enum pid_type type; 1377 enum pid_type type;
1378 1378
1379 for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { 1379 for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
1380 INIT_HLIST_NODE(&links[type].node); /* not really needed */ 1380 INIT_HLIST_NODE(&links[type].node); /* not really needed */
1381 links[type].pid = &init_struct_pid; 1381 links[type].pid = &init_struct_pid;
1382 } 1382 }
1383 } 1383 }
1384 1384
1385 struct task_struct * __cpuinit fork_idle(int cpu) 1385 struct task_struct * __cpuinit fork_idle(int cpu)
1386 { 1386 {
1387 struct task_struct *task; 1387 struct task_struct *task;
1388 struct pt_regs regs; 1388 struct pt_regs regs;
1389 1389
1390 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, 1390 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1391 &init_struct_pid, 0); 1391 &init_struct_pid, 0);
1392 if (!IS_ERR(task)) { 1392 if (!IS_ERR(task)) {
1393 init_idle_pids(task->pids); 1393 init_idle_pids(task->pids);
1394 init_idle(task, cpu); 1394 init_idle(task, cpu);
1395 } 1395 }
1396 1396
1397 return task; 1397 return task;
1398 } 1398 }
1399 1399
1400 /* 1400 /*
1401 * Ok, this is the main fork-routine. 1401 * Ok, this is the main fork-routine.
1402 * 1402 *
1403 * It copies the process, and if successful kick-starts 1403 * It copies the process, and if successful kick-starts
1404 * it and waits for it to finish using the VM if required. 1404 * it and waits for it to finish using the VM if required.
1405 */ 1405 */
1406 long do_fork(unsigned long clone_flags, 1406 long do_fork(unsigned long clone_flags,
1407 unsigned long stack_start, 1407 unsigned long stack_start,
1408 struct pt_regs *regs, 1408 struct pt_regs *regs,
1409 unsigned long stack_size, 1409 unsigned long stack_size,
1410 int __user *parent_tidptr, 1410 int __user *parent_tidptr,
1411 int __user *child_tidptr) 1411 int __user *child_tidptr)
1412 { 1412 {
1413 struct task_struct *p; 1413 struct task_struct *p;
1414 int trace = 0; 1414 int trace = 0;
1415 long nr; 1415 long nr;
1416 1416
1417 /* 1417 /*
1418 * Do some preliminary argument and permissions checking before we 1418 * Do some preliminary argument and permissions checking before we
1419 * actually start allocating stuff 1419 * actually start allocating stuff
1420 */ 1420 */
1421 if (clone_flags & CLONE_NEWUSER) { 1421 if (clone_flags & CLONE_NEWUSER) {
1422 if (clone_flags & CLONE_THREAD) 1422 if (clone_flags & CLONE_THREAD)
1423 return -EINVAL; 1423 return -EINVAL;
1424 /* hopefully this check will go away when userns support is 1424 /* hopefully this check will go away when userns support is
1425 * complete 1425 * complete
1426 */ 1426 */
1427 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || 1427 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
1428 !capable(CAP_SETGID)) 1428 !capable(CAP_SETGID))
1429 return -EPERM; 1429 return -EPERM;
1430 } 1430 }
1431 1431
1432 /* 1432 /*
1433 * When called from kernel_thread, don't do user tracing stuff. 1433 * When called from kernel_thread, don't do user tracing stuff.
1434 */ 1434 */
1435 if (likely(user_mode(regs))) 1435 if (likely(user_mode(regs)))
1436 trace = tracehook_prepare_clone(clone_flags); 1436 trace = tracehook_prepare_clone(clone_flags);
1437 1437
1438 p = copy_process(clone_flags, stack_start, regs, stack_size, 1438 p = copy_process(clone_flags, stack_start, regs, stack_size,
1439 child_tidptr, NULL, trace); 1439 child_tidptr, NULL, trace);
1440 /* 1440 /*
1441 * Do this prior waking up the new thread - the thread pointer 1441 * Do this prior waking up the new thread - the thread pointer
1442 * might get invalid after that point, if the thread exits quickly. 1442 * might get invalid after that point, if the thread exits quickly.
1443 */ 1443 */
1444 if (!IS_ERR(p)) { 1444 if (!IS_ERR(p)) {
1445 struct completion vfork; 1445 struct completion vfork;
1446 1446
1447 trace_sched_process_fork(current, p); 1447 trace_sched_process_fork(current, p);
1448 1448
1449 nr = task_pid_vnr(p); 1449 nr = task_pid_vnr(p);
1450 1450
1451 if (clone_flags & CLONE_PARENT_SETTID) 1451 if (clone_flags & CLONE_PARENT_SETTID)
1452 put_user(nr, parent_tidptr); 1452 put_user(nr, parent_tidptr);
1453 1453
1454 if (clone_flags & CLONE_VFORK) { 1454 if (clone_flags & CLONE_VFORK) {
1455 p->vfork_done = &vfork; 1455 p->vfork_done = &vfork;
1456 init_completion(&vfork); 1456 init_completion(&vfork);
1457 } 1457 }
1458 1458
1459 audit_finish_fork(p); 1459 audit_finish_fork(p);
1460 tracehook_report_clone(regs, clone_flags, nr, p); 1460 tracehook_report_clone(regs, clone_flags, nr, p);
1461 1461
1462 /* 1462 /*
1463 * We set PF_STARTING at creation in case tracing wants to 1463 * We set PF_STARTING at creation in case tracing wants to
1464 * use this to distinguish a fully live task from one that 1464 * use this to distinguish a fully live task from one that
1465 * hasn't gotten to tracehook_report_clone() yet. Now we 1465 * hasn't gotten to tracehook_report_clone() yet. Now we
1466 * clear it and set the child going. 1466 * clear it and set the child going.
1467 */ 1467 */
1468 p->flags &= ~PF_STARTING; 1468 p->flags &= ~PF_STARTING;
1469 1469
1470 wake_up_new_task(p, clone_flags); 1470 wake_up_new_task(p, clone_flags);
1471 1471
1472 tracehook_report_clone_complete(trace, regs, 1472 tracehook_report_clone_complete(trace, regs,
1473 clone_flags, nr, p); 1473 clone_flags, nr, p);
1474 1474
1475 if (clone_flags & CLONE_VFORK) { 1475 if (clone_flags & CLONE_VFORK) {
1476 freezer_do_not_count(); 1476 freezer_do_not_count();
1477 wait_for_completion(&vfork); 1477 wait_for_completion(&vfork);
1478 freezer_count(); 1478 freezer_count();
1479 tracehook_report_vfork_done(p, nr); 1479 tracehook_report_vfork_done(p, nr);
1480 } 1480 }
1481 } else { 1481 } else {
1482 nr = PTR_ERR(p); 1482 nr = PTR_ERR(p);
1483 } 1483 }
1484 return nr; 1484 return nr;
1485 } 1485 }
1486 1486
1487 #ifndef ARCH_MIN_MMSTRUCT_ALIGN 1487 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
1488 #define ARCH_MIN_MMSTRUCT_ALIGN 0 1488 #define ARCH_MIN_MMSTRUCT_ALIGN 0
1489 #endif 1489 #endif
1490 1490
1491 static void sighand_ctor(void *data) 1491 static void sighand_ctor(void *data)
1492 { 1492 {
1493 struct sighand_struct *sighand = data; 1493 struct sighand_struct *sighand = data;
1494 1494
1495 spin_lock_init(&sighand->siglock); 1495 spin_lock_init(&sighand->siglock);
1496 init_waitqueue_head(&sighand->signalfd_wqh); 1496 init_waitqueue_head(&sighand->signalfd_wqh);
1497 } 1497 }
1498 1498
1499 void __init proc_caches_init(void) 1499 void __init proc_caches_init(void)
1500 { 1500 {
1501 sighand_cachep = kmem_cache_create("sighand_cache", 1501 sighand_cachep = kmem_cache_create("sighand_cache",
1502 sizeof(struct sighand_struct), 0, 1502 sizeof(struct sighand_struct), 0,
1503 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| 1503 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
1504 SLAB_NOTRACK, sighand_ctor); 1504 SLAB_NOTRACK, sighand_ctor);
1505 signal_cachep = kmem_cache_create("signal_cache", 1505 signal_cachep = kmem_cache_create("signal_cache",
1506 sizeof(struct signal_struct), 0, 1506 sizeof(struct signal_struct), 0,
1507 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); 1507 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1508 files_cachep = kmem_cache_create("files_cache", 1508 files_cachep = kmem_cache_create("files_cache",
1509 sizeof(struct files_struct), 0, 1509 sizeof(struct files_struct), 0,
1510 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); 1510 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1511 fs_cachep = kmem_cache_create("fs_cache", 1511 fs_cachep = kmem_cache_create("fs_cache",
1512 sizeof(struct fs_struct), 0, 1512 sizeof(struct fs_struct), 0,
1513 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); 1513 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1514 mm_cachep = kmem_cache_create("mm_struct", 1514 mm_cachep = kmem_cache_create("mm_struct",
1515 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1515 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1516 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); 1516 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1517 vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); 1517 vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
1518 mmap_init(); 1518 mmap_init();
1519 } 1519 }
1520 1520
1521 /* 1521 /*
1522 * Check constraints on flags passed to the unshare system call and 1522 * Check constraints on flags passed to the unshare system call.
1523 * force unsharing of additional process context as appropriate.
1524 */ 1523 */
1525 static void check_unshare_flags(unsigned long *flags_ptr) 1524 static int check_unshare_flags(unsigned long unshare_flags)
1526 { 1525 {
1526 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1527 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1528 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
1529 return -EINVAL;
1527 /* 1530 /*
1528 * If unsharing a thread from a thread group, must also 1531 * Not implemented, but pretend it works if there is nothing to
1529 * unshare vm. 1532 * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
1533 * needs to unshare vm.
1530 */ 1534 */
1531 if (*flags_ptr & CLONE_THREAD) 1535 if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
1532 *flags_ptr |= CLONE_VM; 1536 /* FIXME: get_task_mm() increments ->mm_users */
1537 if (atomic_read(&current->mm->mm_users) > 1)
1538 return -EINVAL;
1539 }
1533 1540
1534 /*
1535 * If unsharing vm, must also unshare signal handlers.
1536 */
1537 if (*flags_ptr & CLONE_VM)
1538 *flags_ptr |= CLONE_SIGHAND;
1539
1540 /*
1541 * If unsharing namespace, must also unshare filesystem information.
1542 */
1543 if (*flags_ptr & CLONE_NEWNS)
1544 *flags_ptr |= CLONE_FS;
1545 }
1546
1547 /*
1548 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1549 */
1550 static int unshare_thread(unsigned long unshare_flags)
1551 {
1552 if (unshare_flags & CLONE_THREAD)
1553 return -EINVAL;
1554
1555 return 0; 1541 return 0;
1556 } 1542 }
1557 1543
1558 /* 1544 /*
1559 * Unshare the filesystem structure if it is being shared 1545 * Unshare the filesystem structure if it is being shared
1560 */ 1546 */
1561 static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) 1547 static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1562 { 1548 {
1563 struct fs_struct *fs = current->fs; 1549 struct fs_struct *fs = current->fs;
1564 1550
1565 if (!(unshare_flags & CLONE_FS) || !fs) 1551 if (!(unshare_flags & CLONE_FS) || !fs)
1566 return 0; 1552 return 0;
1567 1553
1568 /* don't need lock here; in the worst case we'll do useless copy */ 1554 /* don't need lock here; in the worst case we'll do useless copy */
1569 if (fs->users == 1) 1555 if (fs->users == 1)
1570 return 0; 1556 return 0;
1571 1557
1572 *new_fsp = copy_fs_struct(fs); 1558 *new_fsp = copy_fs_struct(fs);
1573 if (!*new_fsp) 1559 if (!*new_fsp)
1574 return -ENOMEM; 1560 return -ENOMEM;
1575 1561
1576 return 0; 1562 return 0;
1577 } 1563 }
1578 1564
1579 /* 1565 /*
1580 * Unsharing of sighand is not supported yet
1581 */
1582 static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1583 {
1584 struct sighand_struct *sigh = current->sighand;
1585
1586 if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
1587 return -EINVAL;
1588 else
1589 return 0;
1590 }
1591
1592 /*
1593 * Unshare vm if it is being shared
1594 */
1595 static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1596 {
1597 struct mm_struct *mm = current->mm;
1598
1599 if ((unshare_flags & CLONE_VM) &&
1600 (mm && atomic_read(&mm->mm_users) > 1)) {
1601 return -EINVAL;
1602 }
1603
1604 return 0;
1605 }
1606
1607 /*
1608 * Unshare file descriptor table if it is being shared 1566 * Unshare file descriptor table if it is being shared
1609 */ 1567 */
1610 static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) 1568 static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
1611 { 1569 {
1612 struct files_struct *fd = current->files; 1570 struct files_struct *fd = current->files;
1613 int error = 0; 1571 int error = 0;
1614 1572
1615 if ((unshare_flags & CLONE_FILES) && 1573 if ((unshare_flags & CLONE_FILES) &&
1616 (fd && atomic_read(&fd->count) > 1)) { 1574 (fd && atomic_read(&fd->count) > 1)) {
1617 *new_fdp = dup_fd(fd, &error); 1575 *new_fdp = dup_fd(fd, &error);
1618 if (!*new_fdp) 1576 if (!*new_fdp)
1619 return error; 1577 return error;
1620 } 1578 }
1621 1579
1622 return 0; 1580 return 0;
1623 } 1581 }
1624 1582
1625 /* 1583 /*
1626 * unshare allows a process to 'unshare' part of the process 1584 * unshare allows a process to 'unshare' part of the process
1627 * context which was originally shared using clone. copy_* 1585 * context which was originally shared using clone. copy_*
1628 * functions used by do_fork() cannot be used here directly 1586 * functions used by do_fork() cannot be used here directly
1629 * because they modify an inactive task_struct that is being 1587 * because they modify an inactive task_struct that is being
1630 * constructed. Here we are modifying the current, active, 1588 * constructed. Here we are modifying the current, active,
1631 * task_struct. 1589 * task_struct.
1632 */ 1590 */
1633 SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) 1591 SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1634 { 1592 {
1635 int err = 0;
1636 struct fs_struct *fs, *new_fs = NULL; 1593 struct fs_struct *fs, *new_fs = NULL;
1637 struct sighand_struct *new_sigh = NULL;
1638 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1639 struct files_struct *fd, *new_fd = NULL; 1594 struct files_struct *fd, *new_fd = NULL;
1640 struct nsproxy *new_nsproxy = NULL; 1595 struct nsproxy *new_nsproxy = NULL;
1641 int do_sysvsem = 0; 1596 int do_sysvsem = 0;
1597 int err;
1642 1598
1643 check_unshare_flags(&unshare_flags); 1599 err = check_unshare_flags(unshare_flags);
1644 1600 if (err)
1645 /* Return -EINVAL for all unsupported flags */
1646 err = -EINVAL;
1647 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1648 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1649 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
1650 goto bad_unshare_out; 1601 goto bad_unshare_out;
1651 1602
1652 /* 1603 /*
1604 * If unsharing namespace, must also unshare filesystem information.
1605 */
1606 if (unshare_flags & CLONE_NEWNS)
1607 unshare_flags |= CLONE_FS;
1608 /*
1653 * CLONE_NEWIPC must also detach from the undolist: after switching 1609 * CLONE_NEWIPC must also detach from the undolist: after switching
1654 * to a new ipc namespace, the semaphore arrays from the old 1610 * to a new ipc namespace, the semaphore arrays from the old
1655 * namespace are unreachable. 1611 * namespace are unreachable.
1656 */ 1612 */
1657 if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) 1613 if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
1658 do_sysvsem = 1; 1614 do_sysvsem = 1;
1659 if ((err = unshare_thread(unshare_flags)))
1660 goto bad_unshare_out;
1661 if ((err = unshare_fs(unshare_flags, &new_fs))) 1615 if ((err = unshare_fs(unshare_flags, &new_fs)))
1662 goto bad_unshare_cleanup_thread; 1616 goto bad_unshare_out;
1663 if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1664 goto bad_unshare_cleanup_fs;
1665 if ((err = unshare_vm(unshare_flags, &new_mm)))
1666 goto bad_unshare_cleanup_sigh;
1667 if ((err = unshare_fd(unshare_flags, &new_fd))) 1617 if ((err = unshare_fd(unshare_flags, &new_fd)))
1668 goto bad_unshare_cleanup_vm; 1618 goto bad_unshare_cleanup_fs;
1669 if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, 1619 if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
1670 new_fs))) 1620 new_fs)))
1671 goto bad_unshare_cleanup_fd; 1621 goto bad_unshare_cleanup_fd;
1672 1622
1673 if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { 1623 if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
1674 if (do_sysvsem) { 1624 if (do_sysvsem) {
1675 /* 1625 /*
1676 * CLONE_SYSVSEM is equivalent to sys_exit(). 1626 * CLONE_SYSVSEM is equivalent to sys_exit().
1677 */ 1627 */
1678 exit_sem(current); 1628 exit_sem(current);
1679 } 1629 }
1680 1630
1681 if (new_nsproxy) { 1631 if (new_nsproxy) {
1682 switch_task_namespaces(current, new_nsproxy); 1632 switch_task_namespaces(current, new_nsproxy);
1683 new_nsproxy = NULL; 1633 new_nsproxy = NULL;
1684 } 1634 }
1685 1635
1686 task_lock(current); 1636 task_lock(current);
1687 1637
1688 if (new_fs) { 1638 if (new_fs) {
1689 fs = current->fs; 1639 fs = current->fs;
1690 spin_lock(&fs->lock); 1640 spin_lock(&fs->lock);
1691 current->fs = new_fs; 1641 current->fs = new_fs;
1692 if (--fs->users) 1642 if (--fs->users)
1693 new_fs = NULL; 1643 new_fs = NULL;
1694 else 1644 else
1695 new_fs = fs; 1645 new_fs = fs;
1696 spin_unlock(&fs->lock); 1646 spin_unlock(&fs->lock);
1697 } 1647 }
1698 1648
1699 if (new_mm) {
1700 mm = current->mm;
1701 active_mm = current->active_mm;
1702 current->mm = new_mm;
1703 current->active_mm = new_mm;
1704 if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
1705 atomic_dec(&mm->oom_disable_count);
1706 atomic_inc(&new_mm->oom_disable_count);
1707 }
1708 activate_mm(active_mm, new_mm);
1709 new_mm = mm;
1710 }
1711
1712 if (new_fd) { 1649 if (new_fd) {
1713 fd = current->files; 1650 fd = current->files;
1714 current->files = new_fd; 1651 current->files = new_fd;
1715 new_fd = fd; 1652 new_fd = fd;
1716 } 1653 }
1717 1654
1718 task_unlock(current); 1655 task_unlock(current);
1719 } 1656 }
1720 1657
1721 if (new_nsproxy) 1658 if (new_nsproxy)
1722 put_nsproxy(new_nsproxy); 1659 put_nsproxy(new_nsproxy);
1723 1660
1724 bad_unshare_cleanup_fd: 1661 bad_unshare_cleanup_fd:
1725 if (new_fd) 1662 if (new_fd)
1726 put_files_struct(new_fd); 1663 put_files_struct(new_fd);
1727 1664
1728 bad_unshare_cleanup_vm:
1729 if (new_mm)
1730 mmput(new_mm);
1731
1732 bad_unshare_cleanup_sigh:
1733 if (new_sigh)
1734 if (atomic_dec_and_test(&new_sigh->count))
1735 kmem_cache_free(sighand_cachep, new_sigh);
1736
1737 bad_unshare_cleanup_fs: 1665 bad_unshare_cleanup_fs:
1738 if (new_fs) 1666 if (new_fs)
1739 free_fs_struct(new_fs); 1667 free_fs_struct(new_fs);
1740 1668
1741 bad_unshare_cleanup_thread:
1742 bad_unshare_out: 1669 bad_unshare_out:
1743 return err; 1670 return err;
1744 } 1671 }
1745 1672
1746 /* 1673 /*
1747 * Helper to unshare the files of the current task. 1674 * Helper to unshare the files of the current task.
1748 * We don't want to expose copy_files internals to 1675 * We don't want to expose copy_files internals to
1749 * the exec layer of the kernel. 1676 * the exec layer of the kernel.
1750 */ 1677 */
1751 1678
1752 int unshare_files(struct files_struct **displaced) 1679 int unshare_files(struct files_struct **displaced)
1753 { 1680 {
1754 struct task_struct *task = current; 1681 struct task_struct *task = current;
1755 struct files_struct *copy = NULL; 1682 struct files_struct *copy = NULL;