Blame view

fs/exec.c 46.6 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
  /*
   *  linux/fs/exec.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
  /*
   * #!-checking implemented by tytso.
   */
  /*
   * Demand-loading implemented 01.12.91 - no need to read anything but
   * the header into memory. The inode of the executable is put into
   * "current->executable", and page faults do the actual loading. Clean.
   *
   * Once more I can proudly say that linux stood up to being changed: it
   * was less than 2 hours work to get demand-loading completely implemented.
   *
   * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
   * current->executable is only used by the procfs.  This allows a dispatch
   * table to check for several different types  of binary formats.  We keep
   * trying until we recognize the file or we run out of supported binary
   * formats. 
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
  #include <linux/slab.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
26
  #include <linux/fdtable.h>
ba92a43db   Hugh Dickins   exec: remove some...
27
  #include <linux/mm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
29
  #include <linux/stat.h>
  #include <linux/fcntl.h>
ba92a43db   Hugh Dickins   exec: remove some...
30
  #include <linux/swap.h>
74aadce98   Neil Horman   core_pattern: all...
31
  #include <linux/string.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  #include <linux/init.h>
ca5b172bd   Hugh Dickins   exec: include pag...
33
  #include <linux/pagemap.h>
cdd6c482c   Ingo Molnar   perf: Do the big ...
34
  #include <linux/perf_event.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35
36
37
38
39
  #include <linux/highmem.h>
  #include <linux/spinlock.h>
  #include <linux/key.h>
  #include <linux/personality.h>
  #include <linux/binfmts.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
  #include <linux/utsname.h>
84d737866   Sukadev Bhattiprolu   [PATCH] add child...
41
  #include <linux/pid_namespace.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
44
  #include <linux/module.h>
  #include <linux/namei.h>
  #include <linux/proc_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
  #include <linux/mount.h>
  #include <linux/security.h>
  #include <linux/syscalls.h>
8f0ab5147   Jay Lan   [PATCH] csa: conv...
48
  #include <linux/tsacct_kern.h>
9f46080c4   Matt Helsley   [PATCH] Process E...
49
  #include <linux/cn_proc.h>
473ae30bc   Al Viro   [PATCH] execve ar...
50
  #include <linux/audit.h>
6341c393f   Roland McGrath   tracehook: exec
51
  #include <linux/tracehook.h>
5f4123be3   Johannes Berg   remove CONFIG_KMO...
52
  #include <linux/kmod.h>
6110e3abb   Eric Paris   sys_execve and sy...
53
  #include <linux/fsnotify.h>
5ad4e53bd   Al Viro   Get rid of indire...
54
  #include <linux/fs_struct.h>
61be228a0   Neil Horman   exec: allow do_co...
55
  #include <linux/pipe_fs_i.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
56
57
58
  
  #include <asm/uaccess.h>
  #include <asm/mmu_context.h>
b6a2fea39   Ollie Wild   mm: variable leng...
59
  #include <asm/tlb.h>
a6f76f23d   David Howells   CRED: Make execve...
60
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
  int core_uses_pid;
71ce92f3f   Dan Aloni   make sysctl/kerne...
63
  char core_pattern[CORENAME_MAX_SIZE] = "core";
a293980c2   Neil Horman   exec: let do_core...
64
  unsigned int core_pipe_limit;
d6e711448   Alan Cox   [PATCH] setuid co...
65
  int suid_dumpable = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
66
  /* The maximal length of core_pattern is also specified in sysctl.c */
e4dc1b14d   Alexey Dobriyan   Use list_head in ...
67
  static LIST_HEAD(formats);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
68
  static DEFINE_RWLOCK(binfmt_lock);
74641f584   Ivan Kokshaysky   alpha: binfmt_aou...
69
  int __register_binfmt(struct linux_binfmt * fmt, int insert)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
72
  	if (!fmt)
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
  	write_lock(&binfmt_lock);
74641f584   Ivan Kokshaysky   alpha: binfmt_aou...
74
75
  	insert ? list_add(&fmt->lh, &formats) :
  		 list_add_tail(&fmt->lh, &formats);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
76
77
78
  	write_unlock(&binfmt_lock);
  	return 0;	
  }
74641f584   Ivan Kokshaysky   alpha: binfmt_aou...
79
  EXPORT_SYMBOL(__register_binfmt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80

f6b450d48   Alexey Dobriyan   Make unregister_b...
81
  void unregister_binfmt(struct linux_binfmt * fmt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83
  	write_lock(&binfmt_lock);
e4dc1b14d   Alexey Dobriyan   Use list_head in ...
84
  	list_del(&fmt->lh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85
  	write_unlock(&binfmt_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
  }
  
  EXPORT_SYMBOL(unregister_binfmt);
  
  static inline void put_binfmt(struct linux_binfmt * fmt)
  {
  	module_put(fmt->module);
  }
  
  /*
   * Note that a shared library must be both readable and executable due to
   * security reasons.
   *
   * Also note that we take the address to load from from the file itself.
   */
1e7bfb213   Heiko Carstens   [CVE-2009-0029] S...
101
  SYSCALL_DEFINE1(uselib, const char __user *, library)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
102
  {
964bd1836   Al Viro   [PATCH] get rid o...
103
  	struct file *file;
964bd1836   Al Viro   [PATCH] get rid o...
104
105
  	char *tmp = getname(library);
  	int error = PTR_ERR(tmp);
6e8341a11   Al Viro   Switch open_exec(...
106
107
108
109
110
111
112
113
114
  	if (IS_ERR(tmp))
  		goto out;
  
  	file = do_filp_open(AT_FDCWD, tmp,
  				O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
  				MAY_READ | MAY_EXEC | MAY_OPEN);
  	putname(tmp);
  	error = PTR_ERR(file);
  	if (IS_ERR(file))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
116
117
  		goto out;
  
  	error = -EINVAL;
6e8341a11   Al Viro   Switch open_exec(...
118
  	if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
  		goto exit;
30524472c   Al Viro   [PATCH] take noex...
120
  	error = -EACCES;
6e8341a11   Al Viro   Switch open_exec(...
121
  	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
6146f0d5e   Mimi Zohar   integrity: IMA hooks
122
  		goto exit;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123

2a12a9d78   Eric Paris   fsnotify: pass a ...
124
  	fsnotify_open(file);
6110e3abb   Eric Paris   sys_execve and sy...
125

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
126
127
128
129
130
  	error = -ENOEXEC;
  	if(file->f_op) {
  		struct linux_binfmt * fmt;
  
  		read_lock(&binfmt_lock);
e4dc1b14d   Alexey Dobriyan   Use list_head in ...
131
  		list_for_each_entry(fmt, &formats, lh) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132
133
134
135
136
137
138
139
140
141
142
143
144
  			if (!fmt->load_shlib)
  				continue;
  			if (!try_module_get(fmt->module))
  				continue;
  			read_unlock(&binfmt_lock);
  			error = fmt->load_shlib(file);
  			read_lock(&binfmt_lock);
  			put_binfmt(fmt);
  			if (error != -ENOEXEC)
  				break;
  		}
  		read_unlock(&binfmt_lock);
  	}
6e8341a11   Al Viro   Switch open_exec(...
145
  exit:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
147
148
  	fput(file);
  out:
    	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
  }
b6a2fea39   Ollie Wild   mm: variable leng...
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
  #ifdef CONFIG_MMU
  
  static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
  		int write)
  {
  	struct page *page;
  	int ret;
  
  #ifdef CONFIG_STACK_GROWSUP
  	if (write) {
  		ret = expand_stack_downwards(bprm->vma, pos);
  		if (ret < 0)
  			return NULL;
  	}
  #endif
  	ret = get_user_pages(current, bprm->mm, pos,
  			1, write, 1, &page, NULL);
  	if (ret <= 0)
  		return NULL;
  
  	if (write) {
b6a2fea39   Ollie Wild   mm: variable leng...
171
  		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
a64e715fc   Linus Torvalds   Allow ARG_MAX exe...
172
173
174
175
176
177
178
179
  		struct rlimit *rlim;
  
  		/*
  		 * We've historically supported up to 32 pages (ARG_MAX)
  		 * of argument strings even with small stacks
  		 */
  		if (size <= ARG_MAX)
  			return page;
b6a2fea39   Ollie Wild   mm: variable leng...
180
181
182
183
184
185
186
187
  
  		/*
  		 * Limit to 1/4-th the stack size for the argv+env strings.
  		 * This ensures that:
  		 *  - the remaining binfmt code will not run out of stack space,
  		 *  - the program will have a reasonable amount of stack left
  		 *    to work from.
  		 */
a64e715fc   Linus Torvalds   Allow ARG_MAX exe...
188
  		rlim = current->signal->rlim;
d554ed895   Jiri Slaby   fs: use rlimit he...
189
  		if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
b6a2fea39   Ollie Wild   mm: variable leng...
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
  			put_page(page);
  			return NULL;
  		}
  	}
  
  	return page;
  }
  
  static void put_arg_page(struct page *page)
  {
  	put_page(page);
  }
  
  static void free_arg_page(struct linux_binprm *bprm, int i)
  {
  }
  
  static void free_arg_pages(struct linux_binprm *bprm)
  {
  }
  
  static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
  		struct page *page)
  {
  	flush_cache_page(bprm->vma, pos, page_to_pfn(page));
  }
  
  static int __bprm_mm_init(struct linux_binprm *bprm)
  {
eaccbfa56   Luiz Fernando N. Capitulino   fs/exec.c:__bprm_...
219
  	int err;
b6a2fea39   Ollie Wild   mm: variable leng...
220
221
222
223
224
  	struct vm_area_struct *vma = NULL;
  	struct mm_struct *mm = bprm->mm;
  
  	bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
  	if (!vma)
eaccbfa56   Luiz Fernando N. Capitulino   fs/exec.c:__bprm_...
225
  		return -ENOMEM;
b6a2fea39   Ollie Wild   mm: variable leng...
226
227
228
229
230
231
232
233
234
235
  
  	down_write(&mm->mmap_sem);
  	vma->vm_mm = mm;
  
  	/*
  	 * Place the stack at the largest stack address the architecture
  	 * supports. Later, we'll move this to an appropriate place. We don't
  	 * use STACK_TOP because that can depend on attributes which aren't
  	 * configured yet.
  	 */
a8bef8ff6   Mel Gorman   mm: migration: av...
236
  	BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
b6a2fea39   Ollie Wild   mm: variable leng...
237
238
  	vma->vm_end = STACK_TOP_MAX;
  	vma->vm_start = vma->vm_end - PAGE_SIZE;
a8bef8ff6   Mel Gorman   mm: migration: av...
239
  	vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
3ed75eb8f   Coly Li   setup vma->vm_pag...
240
  	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
5beb49305   Rik van Riel   mm: change anon_v...
241
  	INIT_LIST_HEAD(&vma->anon_vma_chain);
b6a2fea39   Ollie Wild   mm: variable leng...
242
  	err = insert_vm_struct(mm, vma);
eaccbfa56   Luiz Fernando N. Capitulino   fs/exec.c:__bprm_...
243
  	if (err)
b6a2fea39   Ollie Wild   mm: variable leng...
244
  		goto err;
b6a2fea39   Ollie Wild   mm: variable leng...
245
246
247
  
  	mm->stack_vm = mm->total_vm = 1;
  	up_write(&mm->mmap_sem);
b6a2fea39   Ollie Wild   mm: variable leng...
248
  	bprm->p = vma->vm_end - sizeof(void *);
b6a2fea39   Ollie Wild   mm: variable leng...
249
  	return 0;
b6a2fea39   Ollie Wild   mm: variable leng...
250
  err:
eaccbfa56   Luiz Fernando N. Capitulino   fs/exec.c:__bprm_...
251
252
253
  	up_write(&mm->mmap_sem);
  	bprm->vma = NULL;
  	kmem_cache_free(vm_area_cachep, vma);
b6a2fea39   Ollie Wild   mm: variable leng...
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
  	return err;
  }
  
  static bool valid_arg_len(struct linux_binprm *bprm, long len)
  {
  	return len <= MAX_ARG_STRLEN;
  }
  
  #else
  
  static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
  		int write)
  {
  	struct page *page;
  
  	page = bprm->page[pos / PAGE_SIZE];
  	if (!page && write) {
  		page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
  		if (!page)
  			return NULL;
  		bprm->page[pos / PAGE_SIZE] = page;
  	}
  
  	return page;
  }
  
  static void put_arg_page(struct page *page)
  {
  }
  
  static void free_arg_page(struct linux_binprm *bprm, int i)
  {
  	if (bprm->page[i]) {
  		__free_page(bprm->page[i]);
  		bprm->page[i] = NULL;
  	}
  }
  
  static void free_arg_pages(struct linux_binprm *bprm)
  {
  	int i;
  
  	for (i = 0; i < MAX_ARG_PAGES; i++)
  		free_arg_page(bprm, i);
  }
  
  static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
  		struct page *page)
  {
  }
  
  static int __bprm_mm_init(struct linux_binprm *bprm)
  {
  	bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
  	return 0;
  }
  
  static bool valid_arg_len(struct linux_binprm *bprm, long len)
  {
  	return len <= bprm->p;
  }
  
  #endif /* CONFIG_MMU */
  
  /*
   * Create a new mm_struct and populate it with a temporary stack
   * vm_area_struct.  We don't have enough context at this point to set the stack
   * flags, permissions, and offset, so we use temporary values.  We'll update
   * them later in setup_arg_pages().
   */
  int bprm_mm_init(struct linux_binprm *bprm)
  {
  	int err;
  	struct mm_struct *mm = NULL;
  
  	bprm->mm = mm = mm_alloc();
  	err = -ENOMEM;
  	if (!mm)
  		goto err;
  
  	err = init_new_context(current, mm);
  	if (err)
  		goto err;
  
  	err = __bprm_mm_init(bprm);
  	if (err)
  		goto err;
  
  	return 0;
  
  err:
  	if (mm) {
  		bprm->mm = NULL;
  		mmdrop(mm);
  	}
  
  	return err;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
353
354
  /*
   * count() counts the number of strings in array ARGV.
   */
d7627467b   David Howells   Make do_execve() ...
355
  static int count(const char __user * const __user * argv, int max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
357
358
359
360
  {
  	int i = 0;
  
  	if (argv != NULL) {
  		for (;;) {
d7627467b   David Howells   Make do_execve() ...
361
  			const char __user * p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
362
363
364
365
366
367
  
  			if (get_user(p, argv))
  				return -EFAULT;
  			if (!p)
  				break;
  			argv++;
362e6663e   Jason Baron   exec.c, compat.c:...
368
  			if (i++ >= max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
369
  				return -E2BIG;
9aea5a65a   Roland McGrath   execve: make resp...
370
371
372
  
  			if (fatal_signal_pending(current))
  				return -ERESTARTNOHAND;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
373
374
375
376
377
378
379
  			cond_resched();
  		}
  	}
  	return i;
  }
  
  /*
b6a2fea39   Ollie Wild   mm: variable leng...
380
381
382
   * 'copy_strings()' copies argument/environment strings from the old
   * processes's memory to the new process's stack.  The call to get_user_pages()
   * ensures the destination page is created and not swapped out.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
383
   */
d7627467b   David Howells   Make do_execve() ...
384
  static int copy_strings(int argc, const char __user *const __user *argv,
75c96f858   Adrian Bunk   [PATCH] make some...
385
  			struct linux_binprm *bprm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
386
387
388
  {
  	struct page *kmapped_page = NULL;
  	char *kaddr = NULL;
b6a2fea39   Ollie Wild   mm: variable leng...
389
  	unsigned long kpos = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
390
391
392
  	int ret;
  
  	while (argc-- > 0) {
d7627467b   David Howells   Make do_execve() ...
393
  		const char __user *str;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
394
395
396
397
  		int len;
  		unsigned long pos;
  
  		if (get_user(str, argv+argc) ||
b6a2fea39   Ollie Wild   mm: variable leng...
398
  				!(len = strnlen_user(str, MAX_ARG_STRLEN))) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
399
400
401
  			ret = -EFAULT;
  			goto out;
  		}
b6a2fea39   Ollie Wild   mm: variable leng...
402
  		if (!valid_arg_len(bprm, len)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403
404
405
  			ret = -E2BIG;
  			goto out;
  		}
b6a2fea39   Ollie Wild   mm: variable leng...
406
  		/* We're going to work our way backwords. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407
  		pos = bprm->p;
b6a2fea39   Ollie Wild   mm: variable leng...
408
409
  		str += len;
  		bprm->p -= len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
411
  
  		while (len > 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
412
  			int offset, bytes_to_copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413

9aea5a65a   Roland McGrath   execve: make resp...
414
415
416
417
  			if (fatal_signal_pending(current)) {
  				ret = -ERESTARTNOHAND;
  				goto out;
  			}
7993bc1f4   Roland McGrath   execve: improve i...
418
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
419
  			offset = pos % PAGE_SIZE;
b6a2fea39   Ollie Wild   mm: variable leng...
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
  			if (offset == 0)
  				offset = PAGE_SIZE;
  
  			bytes_to_copy = offset;
  			if (bytes_to_copy > len)
  				bytes_to_copy = len;
  
  			offset -= bytes_to_copy;
  			pos -= bytes_to_copy;
  			str -= bytes_to_copy;
  			len -= bytes_to_copy;
  
  			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
  				struct page *page;
  
  				page = get_arg_page(bprm, pos, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
436
  				if (!page) {
b6a2fea39   Ollie Wild   mm: variable leng...
437
  					ret = -E2BIG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438
439
  					goto out;
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440

b6a2fea39   Ollie Wild   mm: variable leng...
441
442
  				if (kmapped_page) {
  					flush_kernel_dcache_page(kmapped_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
443
  					kunmap(kmapped_page);
b6a2fea39   Ollie Wild   mm: variable leng...
444
445
  					put_arg_page(kmapped_page);
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
446
447
  				kmapped_page = page;
  				kaddr = kmap(kmapped_page);
b6a2fea39   Ollie Wild   mm: variable leng...
448
449
  				kpos = pos & PAGE_MASK;
  				flush_arg_page(bprm, kpos, kmapped_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
450
  			}
b6a2fea39   Ollie Wild   mm: variable leng...
451
  			if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
453
454
  				ret = -EFAULT;
  				goto out;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
456
457
458
  		}
  	}
  	ret = 0;
  out:
b6a2fea39   Ollie Wild   mm: variable leng...
459
460
  	if (kmapped_page) {
  		flush_kernel_dcache_page(kmapped_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
  		kunmap(kmapped_page);
b6a2fea39   Ollie Wild   mm: variable leng...
462
463
  		put_arg_page(kmapped_page);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
464
465
466
467
468
469
  	return ret;
  }
  
  /*
   * Like copy_strings, but get argv and its values from kernel memory.
   */
d7627467b   David Howells   Make do_execve() ...
470
471
  int copy_strings_kernel(int argc, const char *const *argv,
  			struct linux_binprm *bprm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
472
473
474
475
  {
  	int r;
  	mm_segment_t oldfs = get_fs();
  	set_fs(KERNEL_DS);
d7627467b   David Howells   Make do_execve() ...
476
  	r = copy_strings(argc, (const char __user *const  __user *)argv, bprm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477
478
479
  	set_fs(oldfs);
  	return r;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
480
481
482
  EXPORT_SYMBOL(copy_strings_kernel);
  
  #ifdef CONFIG_MMU
b6a2fea39   Ollie Wild   mm: variable leng...
483

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
484
  /*
b6a2fea39   Ollie Wild   mm: variable leng...
485
486
487
   * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
   * the binfmt code determines where the new stack should reside, we shift it to
   * its final location.  The process proceeds as follows:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
488
   *
b6a2fea39   Ollie Wild   mm: variable leng...
489
490
491
492
493
494
   * 1) Use shift to calculate the new vma endpoints.
   * 2) Extend vma to cover both the old and new ranges.  This ensures the
   *    arguments passed to subsequent functions are consistent.
   * 3) Move vma's page tables to the new range.
   * 4) Free up any cleared pgd range.
   * 5) Shrink the vma to cover only the new range.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
495
   */
b6a2fea39   Ollie Wild   mm: variable leng...
496
  static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
497
498
  {
  	struct mm_struct *mm = vma->vm_mm;
b6a2fea39   Ollie Wild   mm: variable leng...
499
500
501
502
503
504
  	unsigned long old_start = vma->vm_start;
  	unsigned long old_end = vma->vm_end;
  	unsigned long length = old_end - old_start;
  	unsigned long new_start = old_start - shift;
  	unsigned long new_end = old_end - shift;
  	struct mmu_gather *tlb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
505

b6a2fea39   Ollie Wild   mm: variable leng...
506
  	BUG_ON(new_start > new_end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
507

b6a2fea39   Ollie Wild   mm: variable leng...
508
509
510
511
512
513
514
515
516
517
  	/*
  	 * ensure there are no vmas between where we want to go
  	 * and where we are
  	 */
  	if (vma != find_vma(mm, new_start))
  		return -EFAULT;
  
  	/*
  	 * cover the whole range: [new_start, old_end)
  	 */
5beb49305   Rik van Riel   mm: change anon_v...
518
519
  	if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
  		return -ENOMEM;
b6a2fea39   Ollie Wild   mm: variable leng...
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
  
  	/*
  	 * move the page tables downwards, on failure we rely on
  	 * process cleanup to remove whatever mess we made.
  	 */
  	if (length != move_page_tables(vma, old_start,
  				       vma, new_start, length))
  		return -ENOMEM;
  
  	lru_add_drain();
  	tlb = tlb_gather_mmu(mm, 0);
  	if (new_end > old_start) {
  		/*
  		 * when the old and new regions overlap clear from new_end.
  		 */
42b777281   Jan Beulich   mm: remove double...
535
  		free_pgd_range(tlb, new_end, old_end, new_end,
b6a2fea39   Ollie Wild   mm: variable leng...
536
537
538
539
540
541
542
543
  			vma->vm_next ? vma->vm_next->vm_start : 0);
  	} else {
  		/*
  		 * otherwise, clean from old_start; this is done to not touch
  		 * the address space in [new_end, old_start) some architectures
  		 * have constraints on va-space that make this illegal (IA64) -
  		 * for the others its just a little faster.
  		 */
42b777281   Jan Beulich   mm: remove double...
544
  		free_pgd_range(tlb, old_start, old_end, new_end,
b6a2fea39   Ollie Wild   mm: variable leng...
545
  			vma->vm_next ? vma->vm_next->vm_start : 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
546
  	}
b6a2fea39   Ollie Wild   mm: variable leng...
547
548
549
  	tlb_finish_mmu(tlb, new_end, old_end);
  
  	/*
5beb49305   Rik van Riel   mm: change anon_v...
550
  	 * Shrink the vma to just the new range.  Always succeeds.
b6a2fea39   Ollie Wild   mm: variable leng...
551
552
553
554
  	 */
  	vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
  
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
555
  }
b6a2fea39   Ollie Wild   mm: variable leng...
556
557
558
559
  /*
   * Finalizes the stack vm_area_struct. The flags and permissions are updated,
   * the stack is optionally relocated, and some extra space is added.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
560
561
562
563
  int setup_arg_pages(struct linux_binprm *bprm,
  		    unsigned long stack_top,
  		    int executable_stack)
  {
b6a2fea39   Ollie Wild   mm: variable leng...
564
565
  	unsigned long ret;
  	unsigned long stack_shift;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566
  	struct mm_struct *mm = current->mm;
b6a2fea39   Ollie Wild   mm: variable leng...
567
568
569
570
  	struct vm_area_struct *vma = bprm->vma;
  	struct vm_area_struct *prev = NULL;
  	unsigned long vm_flags;
  	unsigned long stack_base;
803bf5ec2   Michael Neuling   fs/exec.c: restri...
571
572
573
  	unsigned long stack_size;
  	unsigned long stack_expand;
  	unsigned long rlim_stack;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
574
575
  
  #ifdef CONFIG_STACK_GROWSUP
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
  	/* Limit stack size to 1GB */
d554ed895   Jiri Slaby   fs: use rlimit he...
577
  	stack_base = rlimit_max(RLIMIT_STACK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578
579
  	if (stack_base > (1 << 30))
  		stack_base = 1 << 30;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580

b6a2fea39   Ollie Wild   mm: variable leng...
581
582
583
  	/* Make sure we didn't let the argument array grow too large. */
  	if (vma->vm_end - vma->vm_start > stack_base)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
584

b6a2fea39   Ollie Wild   mm: variable leng...
585
  	stack_base = PAGE_ALIGN(stack_top - stack_base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586

b6a2fea39   Ollie Wild   mm: variable leng...
587
588
589
  	stack_shift = vma->vm_start - stack_base;
  	mm->arg_start = bprm->p - stack_shift;
  	bprm->p = vma->vm_end - stack_shift;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590
  #else
b6a2fea39   Ollie Wild   mm: variable leng...
591
592
  	stack_top = arch_align_stack(stack_top);
  	stack_top = PAGE_ALIGN(stack_top);
1b528181b   Roland McGrath   setup_arg_pages: ...
593
594
595
596
  
  	if (unlikely(stack_top < mmap_min_addr) ||
  	    unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
  		return -ENOMEM;
b6a2fea39   Ollie Wild   mm: variable leng...
597
598
599
  	stack_shift = vma->vm_end - stack_top;
  
  	bprm->p -= stack_shift;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
600
  	mm->arg_start = bprm->p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
601
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
602
  	if (bprm->loader)
b6a2fea39   Ollie Wild   mm: variable leng...
603
604
  		bprm->loader -= stack_shift;
  	bprm->exec -= stack_shift;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
605

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
606
  	down_write(&mm->mmap_sem);
96a8e13ed   Hugh Dickins   exec: fix stack e...
607
  	vm_flags = VM_STACK_FLAGS;
b6a2fea39   Ollie Wild   mm: variable leng...
608
609
610
611
612
613
614
615
616
617
618
  
  	/*
  	 * Adjust stack execute permissions; explicitly enable for
  	 * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
  	 * (arch default) otherwise.
  	 */
  	if (unlikely(executable_stack == EXSTACK_ENABLE_X))
  		vm_flags |= VM_EXEC;
  	else if (executable_stack == EXSTACK_DISABLE_X)
  		vm_flags &= ~VM_EXEC;
  	vm_flags |= mm->def_flags;
a8bef8ff6   Mel Gorman   mm: migration: av...
619
  	vm_flags |= VM_STACK_INCOMPLETE_SETUP;
b6a2fea39   Ollie Wild   mm: variable leng...
620
621
622
623
624
625
626
627
628
629
  
  	ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
  			vm_flags);
  	if (ret)
  		goto out_unlock;
  	BUG_ON(prev != vma);
  
  	/* Move stack pages down in memory. */
  	if (stack_shift) {
  		ret = shift_arg_pages(vma, stack_shift);
fc63cf237   Anton Blanchard   exec: setup_arg_p...
630
631
  		if (ret)
  			goto out_unlock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
632
  	}
a8bef8ff6   Mel Gorman   mm: migration: av...
633
634
  	/* mprotect_fixup is overkill to remove the temporary stack flags */
  	vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP;
5ef097dd7   Michael Neuling   exec: create init...
635
  	stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
803bf5ec2   Michael Neuling   fs/exec.c: restri...
636
637
638
639
640
641
  	stack_size = vma->vm_end - vma->vm_start;
  	/*
  	 * Align this down to a page boundary as expand_stack
  	 * will align it up.
  	 */
  	rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
b6a2fea39   Ollie Wild   mm: variable leng...
642
  #ifdef CONFIG_STACK_GROWSUP
803bf5ec2   Michael Neuling   fs/exec.c: restri...
643
644
645
646
  	if (stack_size + stack_expand > rlim_stack)
  		stack_base = vma->vm_start + rlim_stack;
  	else
  		stack_base = vma->vm_end + stack_expand;
b6a2fea39   Ollie Wild   mm: variable leng...
647
  #else
803bf5ec2   Michael Neuling   fs/exec.c: restri...
648
649
650
651
  	if (stack_size + stack_expand > rlim_stack)
  		stack_base = vma->vm_end - rlim_stack;
  	else
  		stack_base = vma->vm_start - stack_expand;
b6a2fea39   Ollie Wild   mm: variable leng...
652
  #endif
3af9e8592   Eric B Munson   perf: Add non-exe...
653
  	current->mm->start_stack = bprm->p;
b6a2fea39   Ollie Wild   mm: variable leng...
654
655
656
657
658
  	ret = expand_stack(vma, stack_base);
  	if (ret)
  		ret = -EFAULT;
  
  out_unlock:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
659
  	up_write(&mm->mmap_sem);
fc63cf237   Anton Blanchard   exec: setup_arg_p...
660
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
661
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662
  EXPORT_SYMBOL(setup_arg_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
663
664
665
666
  #endif /* CONFIG_MMU */
  
  struct file *open_exec(const char *name)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
667
  	struct file *file;
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
668
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
669

6e8341a11   Al Viro   Switch open_exec(...
670
671
672
673
  	file = do_filp_open(AT_FDCWD, name,
  				O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
  				MAY_EXEC | MAY_OPEN);
  	if (IS_ERR(file))
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
674
675
676
  		goto out;
  
  	err = -EACCES;
6e8341a11   Al Viro   Switch open_exec(...
677
678
  	if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
  		goto exit;
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
679

6e8341a11   Al Viro   Switch open_exec(...
680
681
  	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
  		goto exit;
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
682

2a12a9d78   Eric Paris   fsnotify: pass a ...
683
  	fsnotify_open(file);
6110e3abb   Eric Paris   sys_execve and sy...
684

e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
685
  	err = deny_write_access(file);
6e8341a11   Al Viro   Switch open_exec(...
686
687
  	if (err)
  		goto exit;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
688

6e8341a11   Al Viro   Switch open_exec(...
689
  out:
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
690
  	return file;
6e8341a11   Al Viro   Switch open_exec(...
691
692
  exit:
  	fput(file);
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
693
694
  	return ERR_PTR(err);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
695
  EXPORT_SYMBOL(open_exec);
6777d773a   Mimi Zohar   kernel_read: rede...
696
697
  int kernel_read(struct file *file, loff_t offset,
  		char *addr, unsigned long count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
  {
  	mm_segment_t old_fs;
  	loff_t pos = offset;
  	int result;
  
  	old_fs = get_fs();
  	set_fs(get_ds());
  	/* The cast to a user pointer is valid due to the set_fs() */
  	result = vfs_read(file, (void __user *)addr, count, &pos);
  	set_fs(old_fs);
  	return result;
  }
  
  EXPORT_SYMBOL(kernel_read);
  
  static int exec_mmap(struct mm_struct *mm)
  {
  	struct task_struct *tsk;
  	struct mm_struct * old_mm, *active_mm;
  
  	/* Notify parent that we're no longer interested in the old VM */
  	tsk = current;
  	old_mm = current->mm;
34e55232e   KAMEZAWA Hiroyuki   mm: avoid false s...
721
  	sync_mm_rss(tsk, old_mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
722
723
724
725
726
727
728
  	mm_release(tsk, old_mm);
  
  	if (old_mm) {
  		/*
  		 * Make sure that if there is a core dump in progress
  		 * for the old mm, we get out and die instead of going
  		 * through with the exec.  We must hold mmap_sem around
999d9fc16   Oleg Nesterov   coredump: move mm...
729
  		 * checking core_state and changing tsk->mm.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730
731
  		 */
  		down_read(&old_mm->mmap_sem);
999d9fc16   Oleg Nesterov   coredump: move mm...
732
  		if (unlikely(old_mm->core_state)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
733
734
735
736
737
738
739
740
741
742
743
744
745
  			up_read(&old_mm->mmap_sem);
  			return -EINTR;
  		}
  	}
  	task_lock(tsk);
  	active_mm = tsk->active_mm;
  	tsk->mm = mm;
  	tsk->active_mm = mm;
  	activate_mm(active_mm, mm);
  	task_unlock(tsk);
  	arch_pick_mmap_layout(mm);
  	if (old_mm) {
  		up_read(&old_mm->mmap_sem);
7dddb12c6   Eric Sesterhenn   BUG_ON() Conversi...
746
  		BUG_ON(active_mm != old_mm);
31a78f23b   Balbir Singh   mm owner: fix rac...
747
  		mm_update_next_owner(old_mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748
749
750
751
752
753
754
755
756
757
758
759
760
  		mmput(old_mm);
  		return 0;
  	}
  	mmdrop(active_mm);
  	return 0;
  }
  
  /*
   * This function makes sure the current process has its own signal table,
   * so that flush_signal_handlers can later reset the handlers without
   * disturbing other processes.  (Other processes might share the signal
   * table via the CLONE_SIGHAND option to clone().)
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
761
  static int de_thread(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
762
763
  {
  	struct signal_struct *sig = tsk->signal;
b2c903b87   Oleg Nesterov   exec: simplify th...
764
  	struct sighand_struct *oldsighand = tsk->sighand;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
765
  	spinlock_t *lock = &oldsighand->siglock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
766

aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
767
  	if (thread_group_empty(tsk))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
768
769
770
771
  		goto no_thread_group;
  
  	/*
  	 * Kill all other threads in the thread group.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
773
  	spin_lock_irq(lock);
ed5d2cac1   Oleg Nesterov   exec: rework the ...
774
  	if (signal_group_exit(sig)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
775
776
777
778
779
  		/*
  		 * Another group action in progress, just
  		 * return so that the signal is processed.
  		 */
  		spin_unlock_irq(lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
781
  		return -EAGAIN;
  	}
d344193a0   Oleg Nesterov   exit: avoid sig->...
782

ed5d2cac1   Oleg Nesterov   exec: rework the ...
783
  	sig->group_exit_task = tsk;
d344193a0   Oleg Nesterov   exit: avoid sig->...
784
785
786
  	sig->notify_count = zap_other_threads(tsk);
  	if (!thread_group_leader(tsk))
  		sig->notify_count--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
787

d344193a0   Oleg Nesterov   exit: avoid sig->...
788
  	while (sig->notify_count) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
790
791
792
793
  		__set_current_state(TASK_UNINTERRUPTIBLE);
  		spin_unlock_irq(lock);
  		schedule();
  		spin_lock_irq(lock);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
794
795
796
797
798
799
800
  	spin_unlock_irq(lock);
  
  	/*
  	 * At this point all other threads have exited, all we have to
  	 * do is to wait for the thread group leader to become inactive,
  	 * and to assume its PID:
  	 */
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
801
  	if (!thread_group_leader(tsk)) {
8187926bd   Oleg Nesterov   posix-timers: sim...
802
  		struct task_struct *leader = tsk->group_leader;
6db840fa7   Oleg Nesterov   exec: RT sub-thre...
803

2800d8d19   Oleg Nesterov   document de_threa...
804
  		sig->notify_count = -1;	/* for exit_notify() */
6db840fa7   Oleg Nesterov   exec: RT sub-thre...
805
806
807
808
809
810
811
812
  		for (;;) {
  			write_lock_irq(&tasklist_lock);
  			if (likely(leader->exit_state))
  				break;
  			__set_current_state(TASK_UNINTERRUPTIBLE);
  			write_unlock_irq(&tasklist_lock);
  			schedule();
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
813

f5e902817   Roland McGrath   [PATCH] process a...
814
815
816
817
818
819
820
821
822
823
  		/*
  		 * The only record we have of the real-time age of a
  		 * process, regardless of execs it's done, is start_time.
  		 * All the past CPU time is accumulated in signal_struct
  		 * from sister threads now dead.  But in this non-leader
  		 * exec, nothing survives from the original leader thread,
  		 * whose birth marks the true age of this process now.
  		 * When we take on its identity by switching to its PID, we
  		 * also take its birthdate (always earlier than our own).
  		 */
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
824
  		tsk->start_time = leader->start_time;
f5e902817   Roland McGrath   [PATCH] process a...
825

bac0abd61   Pavel Emelyanov   Isolate some expl...
826
827
  		BUG_ON(!same_thread_group(leader, tsk));
  		BUG_ON(has_group_leader_pid(tsk));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
828
829
830
831
832
833
  		/*
  		 * An exec() starts a new thread group with the
  		 * TGID of the previous thread group. Rehash the
  		 * two threads with a switched PID, and release
  		 * the former thread group leader:
  		 */
d73d65293   Eric W. Biederman   [PATCH] pidhash: ...
834
835
  
  		/* Become a process group leader with the old leader's pid.
c18258c6f   Eric W. Biederman   [PATCH] pid: Impl...
836
837
  		 * The old leader becomes a thread of the this thread group.
  		 * Note: The old leader also uses this pid until release_task
d73d65293   Eric W. Biederman   [PATCH] pidhash: ...
838
839
  		 *       is called.  Odd but simple and correct.
  		 */
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
840
841
  		detach_pid(tsk, PIDTYPE_PID);
  		tsk->pid = leader->pid;
3743ca05f   Sukadev Bhattiprolu   pid namespaces: u...
842
  		attach_pid(tsk, PIDTYPE_PID,  task_pid(leader));
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
843
844
  		transfer_pid(leader, tsk, PIDTYPE_PGID);
  		transfer_pid(leader, tsk, PIDTYPE_SID);
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
845

aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
846
  		list_replace_rcu(&leader->tasks, &tsk->tasks);
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
847
  		list_replace_init(&leader->sibling, &tsk->sibling);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848

aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
849
850
  		tsk->group_leader = tsk;
  		leader->group_leader = tsk;
de12a7878   Eric W. Biederman   [PATCH] de_thread...
851

aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
852
  		tsk->exit_signal = SIGCHLD;
962b564cf   Oleg Nesterov   [PATCH] fix do_wa...
853
854
855
  
  		BUG_ON(leader->exit_state != EXIT_ZOMBIE);
  		leader->exit_state = EXIT_DEAD;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
856
  		write_unlock_irq(&tasklist_lock);
8187926bd   Oleg Nesterov   posix-timers: sim...
857
858
  
  		release_task(leader);
ed5d2cac1   Oleg Nesterov   exec: rework the ...
859
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
860

6db840fa7   Oleg Nesterov   exec: RT sub-thre...
861
862
  	sig->group_exit_task = NULL;
  	sig->notify_count = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
863
864
  
  no_thread_group:
1f10206cf   Jiri Pirko   getrusage: fill r...
865
866
  	if (current->mm)
  		setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
867
  	exit_itimers(sig);
cbaffba12   Oleg Nesterov   posix timers: dis...
868
  	flush_itimer_signals();
329f7dba5   Oleg Nesterov   [PATCH] fix de_th...
869

b2c903b87   Oleg Nesterov   exec: simplify th...
870
871
  	if (atomic_read(&oldsighand->count) != 1) {
  		struct sighand_struct *newsighand;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
872
  		/*
b2c903b87   Oleg Nesterov   exec: simplify th...
873
874
  		 * This ->sighand is shared with the CLONE_SIGHAND
  		 * but not CLONE_THREAD task, switch to the new one.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
875
  		 */
b2c903b87   Oleg Nesterov   exec: simplify th...
876
877
878
  		newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
  		if (!newsighand)
  			return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
879
880
881
882
883
884
  		atomic_set(&newsighand->count, 1);
  		memcpy(newsighand->action, oldsighand->action,
  		       sizeof(newsighand->action));
  
  		write_lock_irq(&tasklist_lock);
  		spin_lock(&oldsighand->siglock);
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
885
  		rcu_assign_pointer(tsk->sighand, newsighand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
886
887
  		spin_unlock(&oldsighand->siglock);
  		write_unlock_irq(&tasklist_lock);
fba2afaae   Davide Libenzi   signal/timer/even...
888
  		__cleanup_sighand(oldsighand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
889
  	}
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
890
  	BUG_ON(!thread_group_leader(tsk));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
891
892
  	return 0;
  }
0840a90d9   Oleg Nesterov   exec: simplify ->...
893

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
894
895
896
897
  /*
   * These functions flushes out all traces of the currently running executable
   * so that a new one can be started
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
898
  static void flush_old_files(struct files_struct * files)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
899
900
  {
  	long j = -1;
badf16621   Dipankar Sarma   [PATCH] files: br...
901
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
902
903
904
905
906
907
908
  
  	spin_lock(&files->file_lock);
  	for (;;) {
  		unsigned long set, i;
  
  		j++;
  		i = j * __NFDBITS;
badf16621   Dipankar Sarma   [PATCH] files: br...
909
  		fdt = files_fdtable(files);
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
910
  		if (i >= fdt->max_fds)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
911
  			break;
badf16621   Dipankar Sarma   [PATCH] files: br...
912
  		set = fdt->close_on_exec->fds_bits[j];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
913
914
  		if (!set)
  			continue;
badf16621   Dipankar Sarma   [PATCH] files: br...
915
  		fdt->close_on_exec->fds_bits[j] = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
916
917
918
919
920
921
922
923
924
925
926
  		spin_unlock(&files->file_lock);
  		for ( ; set ; i++,set >>= 1) {
  			if (set & 1) {
  				sys_close(i);
  			}
  		}
  		spin_lock(&files->file_lock);
  
  	}
  	spin_unlock(&files->file_lock);
  }
59714d65d   Andrew Morton   get_task_comm(): ...
927
  char *get_task_comm(char *buf, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
928
929
930
931
932
  {
  	/* buf must be at least sizeof(tsk->comm) in size */
  	task_lock(tsk);
  	strncpy(buf, tsk->comm, sizeof(tsk->comm));
  	task_unlock(tsk);
59714d65d   Andrew Morton   get_task_comm(): ...
933
  	return buf;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
934
935
936
937
938
  }
  
  void set_task_comm(struct task_struct *tsk, char *buf)
  {
  	task_lock(tsk);
4614a696b   John Stultz   procfs: allow thr...
939
940
941
942
943
944
945
946
947
  
  	/*
  	 * Threads may access current->comm without holding
  	 * the task lock, so write the string carefully.
  	 * Readers without a lock may see incomplete new
  	 * names but are safe from non-terminating string reads.
  	 */
  	memset(tsk->comm, 0, TASK_COMM_LEN);
  	wmb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
948
949
  	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
  	task_unlock(tsk);
cdd6c482c   Ingo Molnar   perf: Do the big ...
950
  	perf_event_comm(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
951
952
953
954
  }
  
  int flush_old_exec(struct linux_binprm * bprm)
  {
221af7f87   Linus Torvalds   Split 'flush_old_...
955
  	int retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
956
957
958
959
960
961
962
963
  
  	/*
  	 * Make sure we have a private signal table and that
  	 * we are unassociated from the previous thread group.
  	 */
  	retval = de_thread(current);
  	if (retval)
  		goto out;
925d1c401   Matt Helsley   procfs task exe s...
964
  	set_mm_exe_file(bprm->mm, bprm->file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
965
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
966
967
968
969
  	 * Release all of the old mmap stuff
  	 */
  	retval = exec_mmap(bprm->mm);
  	if (retval)
fd8328be8   Al Viro   [PATCH] sanitize ...
970
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
971
972
  
  	bprm->mm = NULL;		/* We're using it now */
7ab02af42   Linus Torvalds   Fix 'flush_old_ex...
973
974
975
976
  
  	current->flags &= ~PF_RANDOMIZE;
  	flush_thread();
  	current->personality &= ~bprm->per_clear;
221af7f87   Linus Torvalds   Split 'flush_old_...
977
978
979
980
981
982
983
984
985
986
  	return 0;
  
  out:
  	return retval;
  }
  EXPORT_SYMBOL(flush_old_exec);
  
  void setup_new_exec(struct linux_binprm * bprm)
  {
  	int i, ch;
d7627467b   David Howells   Make do_execve() ...
987
  	const char *name;
221af7f87   Linus Torvalds   Split 'flush_old_...
988
989
990
  	char tcomm[sizeof(current->comm)];
  
  	arch_pick_mmap_layout(current->mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
991
992
  
  	/* This is the point of no return */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
993
  	current->sas_ss_sp = current->sas_ss_size = 0;
da9592ede   David Howells   CRED: Wrap task c...
994
  	if (current_euid() == current_uid() && current_egid() == current_gid())
6c5d52382   Kawai, Hidehiro   coredump masking:...
995
  		set_dumpable(current->mm, 1);
d6e711448   Alan Cox   [PATCH] setuid co...
996
  	else
6c5d52382   Kawai, Hidehiro   coredump masking:...
997
  		set_dumpable(current->mm, suid_dumpable);
d6e711448   Alan Cox   [PATCH] setuid co...
998

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999
  	name = bprm->filename;
367720923   Paolo 'Blaisorblade' Giarrusso   [PATCH] comments ...
1000
1001
  
  	/* Copies the binary name from after last slash */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002
1003
  	for (i=0; (ch = *(name++)) != '\0';) {
  		if (ch == '/')
367720923   Paolo 'Blaisorblade' Giarrusso   [PATCH] comments ...
1004
  			i = 0; /* overwrite what we wrote */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1005
1006
1007
1008
1009
1010
  		else
  			if (i < (sizeof(tcomm) - 1))
  				tcomm[i++] = ch;
  	}
  	tcomm[i] = '\0';
  	set_task_comm(current, tcomm);
0551fbd29   Benjamin Herrenschmidt   [PATCH] Add mm->t...
1011
1012
1013
1014
1015
  	/* Set the new mm task size. We have to do that late because it may
  	 * depend on TIF_32BIT which is only updated in flush_thread() on
  	 * some architectures like powerpc
  	 */
  	current->mm->task_size = TASK_SIZE;
a6f76f23d   David Howells   CRED: Make execve...
1016
1017
1018
  	/* install the new credentials */
  	if (bprm->cred->uid != current_euid() ||
  	    bprm->cred->gid != current_egid()) {
d2d56c5f5   Marcel Holtmann   Reset current->pd...
1019
1020
  		current->pdeath_signal = 0;
  	} else if (file_permission(bprm->file, MAY_READ) ||
a6f76f23d   David Howells   CRED: Make execve...
1021
  		   bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) {
6c5d52382   Kawai, Hidehiro   coredump masking:...
1022
  		set_dumpable(current->mm, suid_dumpable);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1023
  	}
f65cb45cb   Ingo Molnar   perfcounters: flu...
1024
1025
1026
1027
1028
  	/*
  	 * Flush performance counters when crossing a
  	 * security domain:
  	 */
  	if (!get_dumpable(current->mm))
cdd6c482c   Ingo Molnar   perf: Do the big ...
1029
  		perf_event_exit_task(current);
f65cb45cb   Ingo Molnar   perfcounters: flu...
1030

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1031
1032
1033
1034
1035
1036
1037
  	/* An exec changes our domain. We are no longer part of the thread
  	   group */
  
  	current->self_exec_id++;
  			
  	flush_signal_handlers(current, 0);
  	flush_old_files(current->files);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1038
  }
221af7f87   Linus Torvalds   Split 'flush_old_...
1039
  EXPORT_SYMBOL(setup_new_exec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1040

a6f76f23d   David Howells   CRED: Make execve...
1041
  /*
a2a8474c3   Oleg Nesterov   exec: do not slee...
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
   * Prepare credentials and lock ->cred_guard_mutex.
   * install_exec_creds() commits the new creds and drops the lock.
   * Or, if exec fails before, free_bprm() should release ->cred and
   * and unlock.
   */
  int prepare_bprm_creds(struct linux_binprm *bprm)
  {
  	if (mutex_lock_interruptible(&current->cred_guard_mutex))
  		return -ERESTARTNOINTR;
  
  	bprm->cred = prepare_exec_creds();
  	if (likely(bprm->cred))
  		return 0;
  
  	mutex_unlock(&current->cred_guard_mutex);
  	return -ENOMEM;
  }
  
  void free_bprm(struct linux_binprm *bprm)
  {
  	free_arg_pages(bprm);
  	if (bprm->cred) {
  		mutex_unlock(&current->cred_guard_mutex);
  		abort_creds(bprm->cred);
  	}
  	kfree(bprm);
  }
  
  /*
a6f76f23d   David Howells   CRED: Make execve...
1071
1072
1073
1074
1075
1076
1077
1078
   * install the new credentials for this executable
   */
  void install_exec_creds(struct linux_binprm *bprm)
  {
  	security_bprm_committing_creds(bprm);
  
  	commit_creds(bprm->cred);
  	bprm->cred = NULL;
a2a8474c3   Oleg Nesterov   exec: do not slee...
1079
1080
  	/*
  	 * cred_guard_mutex must be held at least to this point to prevent
a6f76f23d   David Howells   CRED: Make execve...
1081
  	 * ptrace_attach() from altering our determination of the task's
a2a8474c3   Oleg Nesterov   exec: do not slee...
1082
1083
  	 * credentials; any time after this it may be unlocked.
  	 */
a6f76f23d   David Howells   CRED: Make execve...
1084
  	security_bprm_committed_creds(bprm);
a2a8474c3   Oleg Nesterov   exec: do not slee...
1085
  	mutex_unlock(&current->cred_guard_mutex);
a6f76f23d   David Howells   CRED: Make execve...
1086
1087
1088
1089
1090
  }
  EXPORT_SYMBOL(install_exec_creds);
  
  /*
   * determine how safe it is to execute the proposed program
5e751e992   David Howells   CRED: Rename cred...
1091
   * - the caller must hold current->cred_guard_mutex to protect against
a6f76f23d   David Howells   CRED: Make execve...
1092
1093
   *   PTRACE_ATTACH
   */
498052bba   Al Viro   New locking/refco...
1094
  int check_unsafe_exec(struct linux_binprm *bprm)
a6f76f23d   David Howells   CRED: Make execve...
1095
  {
0bf2f3aec   David Howells   CRED: Fix SUID ex...
1096
  	struct task_struct *p = current, *t;
f1191b50e   Al Viro   check_unsafe_exec...
1097
  	unsigned n_fs;
498052bba   Al Viro   New locking/refco...
1098
  	int res = 0;
a6f76f23d   David Howells   CRED: Make execve...
1099
1100
  
  	bprm->unsafe = tracehook_unsafe_exec(p);
0bf2f3aec   David Howells   CRED: Fix SUID ex...
1101
  	n_fs = 1;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1102
  	spin_lock(&p->fs->lock);
437f7fdb6   Oleg Nesterov   check_unsafe_exec...
1103
  	rcu_read_lock();
0bf2f3aec   David Howells   CRED: Fix SUID ex...
1104
1105
1106
  	for (t = next_thread(p); t != p; t = next_thread(t)) {
  		if (t->fs == p->fs)
  			n_fs++;
0bf2f3aec   David Howells   CRED: Fix SUID ex...
1107
  	}
437f7fdb6   Oleg Nesterov   check_unsafe_exec...
1108
  	rcu_read_unlock();
0bf2f3aec   David Howells   CRED: Fix SUID ex...
1109

f1191b50e   Al Viro   check_unsafe_exec...
1110
  	if (p->fs->users > n_fs) {
a6f76f23d   David Howells   CRED: Make execve...
1111
  		bprm->unsafe |= LSM_UNSAFE_SHARE;
498052bba   Al Viro   New locking/refco...
1112
  	} else {
8c652f96d   Oleg Nesterov   do_execve() must ...
1113
1114
1115
1116
1117
  		res = -EAGAIN;
  		if (!p->fs->in_exec) {
  			p->fs->in_exec = 1;
  			res = 1;
  		}
498052bba   Al Viro   New locking/refco...
1118
  	}
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1119
  	spin_unlock(&p->fs->lock);
498052bba   Al Viro   New locking/refco...
1120
1121
  
  	return res;
a6f76f23d   David Howells   CRED: Make execve...
1122
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1123
1124
1125
  /* 
   * Fill the binprm structure from the inode. 
   * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
a6f76f23d   David Howells   CRED: Make execve...
1126
1127
   *
   * This may be called multiple times for binary chains (scripts for example).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1128
1129
1130
   */
  int prepare_binprm(struct linux_binprm *bprm)
  {
a6f76f23d   David Howells   CRED: Make execve...
1131
  	umode_t mode;
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
1132
  	struct inode * inode = bprm->file->f_path.dentry->d_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1133
1134
1135
  	int retval;
  
  	mode = inode->i_mode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1136
1137
  	if (bprm->file->f_op == NULL)
  		return -EACCES;
a6f76f23d   David Howells   CRED: Make execve...
1138
1139
1140
  	/* clear any previous set[ug]id data from a previous binary */
  	bprm->cred->euid = current_euid();
  	bprm->cred->egid = current_egid();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1141

a6f76f23d   David Howells   CRED: Make execve...
1142
  	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1143
1144
  		/* Set-uid? */
  		if (mode & S_ISUID) {
a6f76f23d   David Howells   CRED: Make execve...
1145
1146
  			bprm->per_clear |= PER_CLEAR_ON_SETID;
  			bprm->cred->euid = inode->i_uid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1147
1148
1149
1150
1151
1152
1153
1154
1155
  		}
  
  		/* Set-gid? */
  		/*
  		 * If setgid is set but no group execute bit then this
  		 * is a candidate for mandatory locking, not a setgid
  		 * executable.
  		 */
  		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
a6f76f23d   David Howells   CRED: Make execve...
1156
1157
  			bprm->per_clear |= PER_CLEAR_ON_SETID;
  			bprm->cred->egid = inode->i_gid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1158
1159
1160
1161
  		}
  	}
  
  	/* fill in binprm security blob */
a6f76f23d   David Howells   CRED: Make execve...
1162
  	retval = security_bprm_set_creds(bprm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1163
1164
  	if (retval)
  		return retval;
a6f76f23d   David Howells   CRED: Make execve...
1165
  	bprm->cred_prepared = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1166

a6f76f23d   David Howells   CRED: Make execve...
1167
1168
  	memset(bprm->buf, 0, BINPRM_BUF_SIZE);
  	return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1169
1170
1171
  }
  
  EXPORT_SYMBOL(prepare_binprm);
4fc75ff48   Nick Piggin   exec: fix remove_...
1172
1173
1174
1175
1176
  /*
   * Arguments are '\0' separated strings found at the location bprm->p
   * points to; chop off the first by relocating brpm->p to right after
   * the first '\0' encountered.
   */
b6a2fea39   Ollie Wild   mm: variable leng...
1177
  int remove_arg_zero(struct linux_binprm *bprm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1178
  {
b6a2fea39   Ollie Wild   mm: variable leng...
1179
1180
1181
1182
  	int ret = 0;
  	unsigned long offset;
  	char *kaddr;
  	struct page *page;
4fc75ff48   Nick Piggin   exec: fix remove_...
1183

b6a2fea39   Ollie Wild   mm: variable leng...
1184
1185
  	if (!bprm->argc)
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1186

b6a2fea39   Ollie Wild   mm: variable leng...
1187
1188
1189
1190
1191
1192
1193
1194
  	do {
  		offset = bprm->p & ~PAGE_MASK;
  		page = get_arg_page(bprm, bprm->p, 0);
  		if (!page) {
  			ret = -EFAULT;
  			goto out;
  		}
  		kaddr = kmap_atomic(page, KM_USER0);
4fc75ff48   Nick Piggin   exec: fix remove_...
1195

b6a2fea39   Ollie Wild   mm: variable leng...
1196
1197
1198
  		for (; offset < PAGE_SIZE && kaddr[offset];
  				offset++, bprm->p++)
  			;
4fc75ff48   Nick Piggin   exec: fix remove_...
1199

b6a2fea39   Ollie Wild   mm: variable leng...
1200
1201
  		kunmap_atomic(kaddr, KM_USER0);
  		put_arg_page(page);
4fc75ff48   Nick Piggin   exec: fix remove_...
1202

b6a2fea39   Ollie Wild   mm: variable leng...
1203
1204
1205
  		if (offset == PAGE_SIZE)
  			free_arg_page(bprm, (bprm->p >> PAGE_SHIFT) - 1);
  	} while (offset == PAGE_SIZE);
4fc75ff48   Nick Piggin   exec: fix remove_...
1206

b6a2fea39   Ollie Wild   mm: variable leng...
1207
1208
1209
  	bprm->p++;
  	bprm->argc--;
  	ret = 0;
4fc75ff48   Nick Piggin   exec: fix remove_...
1210

b6a2fea39   Ollie Wild   mm: variable leng...
1211
1212
  out:
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1213
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1214
1215
1216
1217
1218
1219
1220
  EXPORT_SYMBOL(remove_arg_zero);
  
  /*
   * cycle the list of binary formats handler, until one recognizes the image
   */
  int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
  {
85f334666   Roland McGrath   tracehook: exec d...
1221
  	unsigned int depth = bprm->recursion_depth;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1222
1223
  	int try,retval;
  	struct linux_binfmt *fmt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1224

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1225
1226
1227
1228
1229
1230
1231
  	retval = security_bprm_check(bprm);
  	if (retval)
  		return retval;
  
  	/* kernel module loader fixup */
  	/* so we don't try to load run modprobe in kernel space. */
  	set_fs(USER_DS);
473ae30bc   Al Viro   [PATCH] execve ar...
1232
1233
1234
1235
  
  	retval = audit_bprm(bprm);
  	if (retval)
  		return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1236
1237
1238
  	retval = -ENOENT;
  	for (try=0; try<2; try++) {
  		read_lock(&binfmt_lock);
e4dc1b14d   Alexey Dobriyan   Use list_head in ...
1239
  		list_for_each_entry(fmt, &formats, lh) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1240
1241
1242
1243
1244
1245
1246
  			int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
  			if (!fn)
  				continue;
  			if (!try_module_get(fmt->module))
  				continue;
  			read_unlock(&binfmt_lock);
  			retval = fn(bprm, regs);
85f334666   Roland McGrath   tracehook: exec d...
1247
1248
1249
1250
1251
1252
  			/*
  			 * Restore the depth counter to its starting value
  			 * in this call, so we don't have to rely on every
  			 * load_binary function to restore it on return.
  			 */
  			bprm->recursion_depth = depth;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1253
  			if (retval >= 0) {
85f334666   Roland McGrath   tracehook: exec d...
1254
1255
  				if (depth == 0)
  					tracehook_report_exec(fmt, bprm, regs);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1256
1257
1258
1259
1260
1261
  				put_binfmt(fmt);
  				allow_write_access(bprm->file);
  				if (bprm->file)
  					fput(bprm->file);
  				bprm->file = NULL;
  				current->did_exec = 1;
9f46080c4   Matt Helsley   [PATCH] Process E...
1262
  				proc_exec_connector(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
  				return retval;
  			}
  			read_lock(&binfmt_lock);
  			put_binfmt(fmt);
  			if (retval != -ENOEXEC || bprm->mm == NULL)
  				break;
  			if (!bprm->file) {
  				read_unlock(&binfmt_lock);
  				return retval;
  			}
  		}
  		read_unlock(&binfmt_lock);
  		if (retval != -ENOEXEC || bprm->mm == NULL) {
  			break;
5f4123be3   Johannes Berg   remove CONFIG_KMO...
1277
1278
  #ifdef CONFIG_MODULES
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
  #define printable(c) (((c)=='\t') || ((c)=='
  ') || (0x20<=(c) && (c)<=0x7e))
  			if (printable(bprm->buf[0]) &&
  			    printable(bprm->buf[1]) &&
  			    printable(bprm->buf[2]) &&
  			    printable(bprm->buf[3]))
  				break; /* -ENOEXEC */
  			request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
  #endif
  		}
  	}
  	return retval;
  }
  
  EXPORT_SYMBOL(search_binary_handler);
  
  /*
   * sys_execve() executes a new program.
   */
d7627467b   David Howells   Make do_execve() ...
1298
1299
1300
  int do_execve(const char * filename,
  	const char __user *const __user *argv,
  	const char __user *const __user *envp,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
1302
1303
1304
  	struct pt_regs * regs)
  {
  	struct linux_binprm *bprm;
  	struct file *file;
3b1253880   Al Viro   [PATCH] sanitize ...
1305
  	struct files_struct *displaced;
8c652f96d   Oleg Nesterov   do_execve() must ...
1306
  	bool clear_in_exec;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1307
  	int retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1308

3b1253880   Al Viro   [PATCH] sanitize ...
1309
  	retval = unshare_files(&displaced);
fd8328be8   Al Viro   [PATCH] sanitize ...
1310
1311
  	if (retval)
  		goto out_ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1312
  	retval = -ENOMEM;
11b0b5abb   Oliver Neukum   [PATCH] use kzall...
1313
  	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1314
  	if (!bprm)
fd8328be8   Al Viro   [PATCH] sanitize ...
1315
  		goto out_files;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1316

a2a8474c3   Oleg Nesterov   exec: do not slee...
1317
1318
  	retval = prepare_bprm_creds(bprm);
  	if (retval)
a6f76f23d   David Howells   CRED: Make execve...
1319
  		goto out_free;
498052bba   Al Viro   New locking/refco...
1320
1321
  
  	retval = check_unsafe_exec(bprm);
8c652f96d   Oleg Nesterov   do_execve() must ...
1322
  	if (retval < 0)
a2a8474c3   Oleg Nesterov   exec: do not slee...
1323
  		goto out_free;
8c652f96d   Oleg Nesterov   do_execve() must ...
1324
  	clear_in_exec = retval;
a2a8474c3   Oleg Nesterov   exec: do not slee...
1325
  	current->in_execve = 1;
a6f76f23d   David Howells   CRED: Make execve...
1326

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1327
1328
1329
  	file = open_exec(filename);
  	retval = PTR_ERR(file);
  	if (IS_ERR(file))
498052bba   Al Viro   New locking/refco...
1330
  		goto out_unmark;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1331
1332
  
  	sched_exec();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1333
1334
1335
  	bprm->file = file;
  	bprm->filename = filename;
  	bprm->interp = filename;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1336

b6a2fea39   Ollie Wild   mm: variable leng...
1337
1338
1339
  	retval = bprm_mm_init(bprm);
  	if (retval)
  		goto out_file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1340

b6a2fea39   Ollie Wild   mm: variable leng...
1341
  	bprm->argc = count(argv, MAX_ARG_STRINGS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1342
  	if ((retval = bprm->argc) < 0)
a6f76f23d   David Howells   CRED: Make execve...
1343
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1344

b6a2fea39   Ollie Wild   mm: variable leng...
1345
  	bprm->envc = count(envp, MAX_ARG_STRINGS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1346
  	if ((retval = bprm->envc) < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
  		goto out;
  
  	retval = prepare_binprm(bprm);
  	if (retval < 0)
  		goto out;
  
  	retval = copy_strings_kernel(1, &bprm->filename, bprm);
  	if (retval < 0)
  		goto out;
  
  	bprm->exec = bprm->p;
  	retval = copy_strings(bprm->envc, envp, bprm);
  	if (retval < 0)
  		goto out;
  
  	retval = copy_strings(bprm->argc, argv, bprm);
  	if (retval < 0)
  		goto out;
7b34e4283   Oleg Nesterov   introduce PF_KTHR...
1365
  	current->flags &= ~PF_KTHREAD;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1366
  	retval = search_binary_handler(bprm,regs);
a6f76f23d   David Howells   CRED: Make execve...
1367
1368
  	if (retval < 0)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1369

a6f76f23d   David Howells   CRED: Make execve...
1370
  	/* execve succeeded */
498052bba   Al Viro   New locking/refco...
1371
  	current->fs->in_exec = 0;
f9ce1f1cd   Kentaro Takeda   Add in_execve fla...
1372
  	current->in_execve = 0;
a6f76f23d   David Howells   CRED: Make execve...
1373
1374
1375
1376
1377
  	acct_update_integrals(current);
  	free_bprm(bprm);
  	if (displaced)
  		put_files_struct(displaced);
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1378

a6f76f23d   David Howells   CRED: Make execve...
1379
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1380
  	if (bprm->mm)
b6a2fea39   Ollie Wild   mm: variable leng...
1381
  		mmput (bprm->mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1382
1383
1384
1385
1386
1387
  
  out_file:
  	if (bprm->file) {
  		allow_write_access(bprm->file);
  		fput(bprm->file);
  	}
a6f76f23d   David Howells   CRED: Make execve...
1388

498052bba   Al Viro   New locking/refco...
1389
  out_unmark:
8c652f96d   Oleg Nesterov   do_execve() must ...
1390
1391
  	if (clear_in_exec)
  		current->fs->in_exec = 0;
f9ce1f1cd   Kentaro Takeda   Add in_execve fla...
1392
  	current->in_execve = 0;
a6f76f23d   David Howells   CRED: Make execve...
1393
1394
  
  out_free:
08a6fac1c   Al Viro   [PATCH] get rid o...
1395
  	free_bprm(bprm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1396

fd8328be8   Al Viro   [PATCH] sanitize ...
1397
  out_files:
3b1253880   Al Viro   [PATCH] sanitize ...
1398
1399
  	if (displaced)
  		reset_files_struct(displaced);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1400
1401
1402
  out_ret:
  	return retval;
  }
964ee7df9   Oleg Nesterov   exec: fix set_bin...
1403
  void set_binfmt(struct linux_binfmt *new)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1404
  {
801460d0c   Hiroshi Shimamoto   task_struct clean...
1405
1406
1407
1408
  	struct mm_struct *mm = current->mm;
  
  	if (mm->binfmt)
  		module_put(mm->binfmt->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1409

801460d0c   Hiroshi Shimamoto   task_struct clean...
1410
  	mm->binfmt = new;
964ee7df9   Oleg Nesterov   exec: fix set_bin...
1411
1412
  	if (new)
  		__module_get(new->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1413
1414
1415
  }
  
  EXPORT_SYMBOL(set_binfmt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1416
1417
1418
1419
  /* format_corename will inspect the pattern parameter, and output a
   * name into corename, which must have space for at least
   * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
   */
6409324b3   Oleg Nesterov   coredump: format_...
1420
  static int format_corename(char *corename, long signr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1421
  {
86a264abe   David Howells   CRED: Wrap curren...
1422
  	const struct cred *cred = current_cred();
565b9b14e   Oleg Nesterov   coredump: format_...
1423
1424
  	const char *pat_ptr = core_pattern;
  	int ispipe = (*pat_ptr == '|');
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
  	char *out_ptr = corename;
  	char *const out_end = corename + CORENAME_MAX_SIZE;
  	int rc;
  	int pid_in_pattern = 0;
  
  	/* Repeat as long as we have more pattern to process and more output
  	   space */
  	while (*pat_ptr) {
  		if (*pat_ptr != '%') {
  			if (out_ptr == out_end)
  				goto out;
  			*out_ptr++ = *pat_ptr++;
  		} else {
  			switch (*++pat_ptr) {
  			case 0:
  				goto out;
  			/* Double percent, output one percent */
  			case '%':
  				if (out_ptr == out_end)
  					goto out;
  				*out_ptr++ = '%';
  				break;
  			/* pid */
  			case 'p':
  				pid_in_pattern = 1;
  				rc = snprintf(out_ptr, out_end - out_ptr,
b488893a3   Pavel Emelyanov   pid namespaces: c...
1451
  					      "%d", task_tgid_vnr(current));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1452
1453
1454
1455
1456
1457
1458
  				if (rc > out_end - out_ptr)
  					goto out;
  				out_ptr += rc;
  				break;
  			/* uid */
  			case 'u':
  				rc = snprintf(out_ptr, out_end - out_ptr,
86a264abe   David Howells   CRED: Wrap curren...
1459
  					      "%d", cred->uid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1460
1461
1462
1463
1464
1465
1466
  				if (rc > out_end - out_ptr)
  					goto out;
  				out_ptr += rc;
  				break;
  			/* gid */
  			case 'g':
  				rc = snprintf(out_ptr, out_end - out_ptr,
86a264abe   David Howells   CRED: Wrap curren...
1467
  					      "%d", cred->gid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
  				if (rc > out_end - out_ptr)
  					goto out;
  				out_ptr += rc;
  				break;
  			/* signal that caused the coredump */
  			case 's':
  				rc = snprintf(out_ptr, out_end - out_ptr,
  					      "%ld", signr);
  				if (rc > out_end - out_ptr)
  					goto out;
  				out_ptr += rc;
  				break;
  			/* UNIX time of coredump */
  			case 't': {
  				struct timeval tv;
  				do_gettimeofday(&tv);
  				rc = snprintf(out_ptr, out_end - out_ptr,
  					      "%lu", tv.tv_sec);
  				if (rc > out_end - out_ptr)
  					goto out;
  				out_ptr += rc;
  				break;
  			}
  			/* hostname */
  			case 'h':
  				down_read(&uts_sem);
  				rc = snprintf(out_ptr, out_end - out_ptr,
e9ff3990f   Serge E. Hallyn   [PATCH] namespace...
1495
  					      "%s", utsname()->nodename);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
  				up_read(&uts_sem);
  				if (rc > out_end - out_ptr)
  					goto out;
  				out_ptr += rc;
  				break;
  			/* executable */
  			case 'e':
  				rc = snprintf(out_ptr, out_end - out_ptr,
  					      "%s", current->comm);
  				if (rc > out_end - out_ptr)
  					goto out;
  				out_ptr += rc;
  				break;
74aadce98   Neil Horman   core_pattern: all...
1509
1510
1511
  			/* core limit size */
  			case 'c':
  				rc = snprintf(out_ptr, out_end - out_ptr,
d554ed895   Jiri Slaby   fs: use rlimit he...
1512
  					      "%lu", rlimit(RLIMIT_CORE));
74aadce98   Neil Horman   core_pattern: all...
1513
1514
1515
1516
  				if (rc > out_end - out_ptr)
  					goto out;
  				out_ptr += rc;
  				break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
  			default:
  				break;
  			}
  			++pat_ptr;
  		}
  	}
  	/* Backward compatibility with core_uses_pid:
  	 *
  	 * If core_pattern does not include a %p (as is the default)
  	 * and core_uses_pid is set, then .%pid will be appended to
c4bbafda7   Alan Cox   exec.c: fix cored...
1527
  	 * the filename. Do not do this for piped commands. */
6409324b3   Oleg Nesterov   coredump: format_...
1528
  	if (!ispipe && !pid_in_pattern && core_uses_pid) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1529
  		rc = snprintf(out_ptr, out_end - out_ptr,
b488893a3   Pavel Emelyanov   pid namespaces: c...
1530
  			      ".%d", task_tgid_vnr(current));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1531
1532
1533
1534
  		if (rc > out_end - out_ptr)
  			goto out;
  		out_ptr += rc;
  	}
c4bbafda7   Alan Cox   exec.c: fix cored...
1535
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1536
  	*out_ptr = 0;
c4bbafda7   Alan Cox   exec.c: fix cored...
1537
  	return ispipe;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1538
  }
5c99cbf49   Oleg Nesterov   coredump: set ->g...
1539
  static int zap_process(struct task_struct *start, int exit_code)
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1540
1541
  {
  	struct task_struct *t;
8cd9c2491   Oleg Nesterov   coredump: simplif...
1542
  	int nr = 0;
281de339c   Oleg Nesterov   [PATCH] coredump:...
1543

d5f70c00a   Oleg Nesterov   [PATCH] coredump:...
1544
  	start->signal->flags = SIGNAL_GROUP_EXIT;
5c99cbf49   Oleg Nesterov   coredump: set ->g...
1545
  	start->signal->group_exit_code = exit_code;
d5f70c00a   Oleg Nesterov   [PATCH] coredump:...
1546
  	start->signal->group_stop_count = 0;
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1547
1548
1549
1550
  
  	t = start;
  	do {
  		if (t != current && t->mm) {
281de339c   Oleg Nesterov   [PATCH] coredump:...
1551
1552
  			sigaddset(&t->pending.signal, SIGKILL);
  			signal_wake_up(t, 1);
8cd9c2491   Oleg Nesterov   coredump: simplif...
1553
  			nr++;
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1554
  		}
e4901f92a   Oleg Nesterov   coredump: zap_thr...
1555
  	} while_each_thread(start, t);
8cd9c2491   Oleg Nesterov   coredump: simplif...
1556
1557
  
  	return nr;
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1558
  }
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1559
  static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
8cd9c2491   Oleg Nesterov   coredump: simplif...
1560
  				struct core_state *core_state, int exit_code)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1561
1562
  {
  	struct task_struct *g, *p;
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1563
  	unsigned long flags;
8cd9c2491   Oleg Nesterov   coredump: simplif...
1564
  	int nr = -EAGAIN;
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1565
1566
  
  	spin_lock_irq(&tsk->sighand->siglock);
ed5d2cac1   Oleg Nesterov   exec: rework the ...
1567
  	if (!signal_group_exit(tsk->signal)) {
8cd9c2491   Oleg Nesterov   coredump: simplif...
1568
  		mm->core_state = core_state;
5c99cbf49   Oleg Nesterov   coredump: set ->g...
1569
  		nr = zap_process(tsk, exit_code);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1570
  	}
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1571
  	spin_unlock_irq(&tsk->sighand->siglock);
8cd9c2491   Oleg Nesterov   coredump: simplif...
1572
1573
  	if (unlikely(nr < 0))
  		return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1574

8cd9c2491   Oleg Nesterov   coredump: simplif...
1575
  	if (atomic_read(&mm->mm_users) == nr + 1)
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1576
  		goto done;
e4901f92a   Oleg Nesterov   coredump: zap_thr...
1577
1578
  	/*
  	 * We should find and kill all tasks which use this mm, and we should
999d9fc16   Oleg Nesterov   coredump: move mm...
1579
  	 * count them correctly into ->nr_threads. We don't take tasklist
e4901f92a   Oleg Nesterov   coredump: zap_thr...
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
  	 * lock, but this is safe wrt:
  	 *
  	 * fork:
  	 *	None of sub-threads can fork after zap_process(leader). All
  	 *	processes which were created before this point should be
  	 *	visible to zap_threads() because copy_process() adds the new
  	 *	process to the tail of init_task.tasks list, and lock/unlock
  	 *	of ->siglock provides a memory barrier.
  	 *
  	 * do_exit:
  	 *	The caller holds mm->mmap_sem. This means that the task which
  	 *	uses this mm can't pass exit_mm(), so it can't exit or clear
  	 *	its ->mm.
  	 *
  	 * de_thread:
  	 *	It does list_replace_rcu(&leader->tasks, &current->tasks),
  	 *	we must see either old or new leader, this does not matter.
  	 *	However, it can change p->sighand, so lock_task_sighand(p)
  	 *	must be used. Since p->mm != NULL and we hold ->mmap_sem
  	 *	it can't fail.
  	 *
  	 *	Note also that "g" can be the old leader with ->mm == NULL
  	 *	and already unhashed and thus removed from ->thread_group.
  	 *	This is OK, __unhash_process()->list_del_rcu() does not
  	 *	clear the ->next pointer, we will find the new leader via
  	 *	next_thread().
  	 */
7b1c6154f   Oleg Nesterov   [PATCH] coredump:...
1607
  	rcu_read_lock();
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1608
  	for_each_process(g) {
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1609
1610
  		if (g == tsk->group_leader)
  			continue;
15b9f360c   Oleg Nesterov   coredump: zap_thr...
1611
1612
  		if (g->flags & PF_KTHREAD)
  			continue;
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1613
1614
1615
  		p = g;
  		do {
  			if (p->mm) {
15b9f360c   Oleg Nesterov   coredump: zap_thr...
1616
  				if (unlikely(p->mm == mm)) {
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1617
  					lock_task_sighand(p, &flags);
5c99cbf49   Oleg Nesterov   coredump: set ->g...
1618
  					nr += zap_process(p, exit_code);
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1619
1620
  					unlock_task_sighand(p, &flags);
  				}
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1621
1622
  				break;
  			}
e4901f92a   Oleg Nesterov   coredump: zap_thr...
1623
  		} while_each_thread(g, p);
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1624
  	}
7b1c6154f   Oleg Nesterov   [PATCH] coredump:...
1625
  	rcu_read_unlock();
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1626
  done:
c5f1cc8c1   Oleg Nesterov   coredump: turn co...
1627
  	atomic_set(&core_state->nr_threads, nr);
8cd9c2491   Oleg Nesterov   coredump: simplif...
1628
  	return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1629
  }
9d5b327bf   Oleg Nesterov   coredump: make mm...
1630
  static int coredump_wait(int exit_code, struct core_state *core_state)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1631
  {
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1632
1633
  	struct task_struct *tsk = current;
  	struct mm_struct *mm = tsk->mm;
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1634
  	struct completion *vfork_done;
269b005a2   Oleg Nesterov   coredump: shift d...
1635
  	int core_waiters = -EBUSY;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1636

9d5b327bf   Oleg Nesterov   coredump: make mm...
1637
  	init_completion(&core_state->startup);
b564daf80   Oleg Nesterov   coredump: constru...
1638
1639
  	core_state->dumper.task = tsk;
  	core_state->dumper.next = NULL;
269b005a2   Oleg Nesterov   coredump: shift d...
1640
1641
1642
1643
  
  	down_write(&mm->mmap_sem);
  	if (!mm->core_state)
  		core_waiters = zap_threads(tsk, mm, core_state, exit_code);
2384f55f8   Oleg Nesterov   [PATCH] coredump_...
1644
  	up_write(&mm->mmap_sem);
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
  	if (unlikely(core_waiters < 0))
  		goto fail;
  
  	/*
  	 * Make sure nobody is waiting for us to release the VM,
  	 * otherwise we can deadlock when we wait on each other
  	 */
  	vfork_done = tsk->vfork_done;
  	if (vfork_done) {
  		tsk->vfork_done = NULL;
  		complete(vfork_done);
  	}
2384f55f8   Oleg Nesterov   [PATCH] coredump_...
1657
  	if (core_waiters)
9d5b327bf   Oleg Nesterov   coredump: make mm...
1658
  		wait_for_completion(&core_state->startup);
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1659
  fail:
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1660
  	return core_waiters;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1661
  }
a94e2d408   Oleg Nesterov   coredump: kill mm...
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
  static void coredump_finish(struct mm_struct *mm)
  {
  	struct core_thread *curr, *next;
  	struct task_struct *task;
  
  	next = mm->core_state->dumper.next;
  	while ((curr = next) != NULL) {
  		next = curr->next;
  		task = curr->task;
  		/*
  		 * see exit_mm(), curr->task must not see
  		 * ->task == NULL before we read ->next.
  		 */
  		smp_mb();
  		curr->task = NULL;
  		wake_up_process(task);
  	}
  
  	mm->core_state = NULL;
  }
6c5d52382   Kawai, Hidehiro   coredump masking:...
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
  /*
   * set_dumpable converts traditional three-value dumpable to two flags and
   * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
   * these bits are not changed atomically.  So get_dumpable can observe the
   * intermediate state.  To avoid doing unexpected behavior, get get_dumpable
   * return either old dumpable or new one by paying attention to the order of
   * modifying the bits.
   *
   * dumpable |   mm->flags (binary)
   * old  new | initial interim  final
   * ---------+-----------------------
   *  0    1  |   00      01      01
   *  0    2  |   00      10(*)   11
   *  1    0  |   01      00      00
   *  1    2  |   01      11      11
   *  2    0  |   11      10(*)   00
   *  2    1  |   11      11      01
   *
   * (*) get_dumpable regards interim value of 10 as 11.
   */
  void set_dumpable(struct mm_struct *mm, int value)
  {
  	switch (value) {
  	case 0:
  		clear_bit(MMF_DUMPABLE, &mm->flags);
  		smp_wmb();
  		clear_bit(MMF_DUMP_SECURELY, &mm->flags);
  		break;
  	case 1:
  		set_bit(MMF_DUMPABLE, &mm->flags);
  		smp_wmb();
  		clear_bit(MMF_DUMP_SECURELY, &mm->flags);
  		break;
  	case 2:
  		set_bit(MMF_DUMP_SECURELY, &mm->flags);
  		smp_wmb();
  		set_bit(MMF_DUMPABLE, &mm->flags);
  		break;
  	}
  }
6c5d52382   Kawai, Hidehiro   coredump masking:...
1722

30736a4d4   Masami Hiramatsu   coredump: pass mm...
1723
  static int __get_dumpable(unsigned long mm_flags)
6c5d52382   Kawai, Hidehiro   coredump masking:...
1724
1725
  {
  	int ret;
30736a4d4   Masami Hiramatsu   coredump: pass mm...
1726
  	ret = mm_flags & MMF_DUMPABLE_MASK;
6c5d52382   Kawai, Hidehiro   coredump masking:...
1727
1728
  	return (ret >= 2) ? 2 : ret;
  }
30736a4d4   Masami Hiramatsu   coredump: pass mm...
1729
1730
1731
1732
  int get_dumpable(struct mm_struct *mm)
  {
  	return __get_dumpable(mm->flags);
  }
61be228a0   Neil Horman   exec: allow do_co...
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
  static void wait_for_dump_helpers(struct file *file)
  {
  	struct pipe_inode_info *pipe;
  
  	pipe = file->f_path.dentry->d_inode->i_pipe;
  
  	pipe_lock(pipe);
  	pipe->readers++;
  	pipe->writers--;
  
  	while ((pipe->readers > 1) && (!signal_pending(current))) {
  		wake_up_interruptible_sync(&pipe->wait);
  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  		pipe_wait(pipe);
  	}
  
  	pipe->readers--;
  	pipe->writers++;
  	pipe_unlock(pipe);
  
  }
898b374af   Neil Horman   exec: replace cal...
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
  /*
   * uhm_pipe_setup
   * helper function to customize the process used
   * to collect the core in userspace.  Specifically
   * it sets up a pipe and installs it as fd 0 (stdin)
   * for the process.  Returns 0 on success, or
   * PTR_ERR on failure.
   * Note that it also sets the core limit to 1.  This
   * is a special value that we use to trap recursive
   * core dumps
   */
  static int umh_pipe_setup(struct subprocess_info *info)
  {
  	struct file *rp, *wp;
  	struct fdtable *fdt;
  	struct coredump_params *cp = (struct coredump_params *)info->data;
  	struct files_struct *cf = current->files;
  
  	wp = create_write_pipe(0);
  	if (IS_ERR(wp))
  		return PTR_ERR(wp);
  
  	rp = create_read_pipe(wp, 0);
  	if (IS_ERR(rp)) {
  		free_write_pipe(wp);
  		return PTR_ERR(rp);
  	}
  
  	cp->file = wp;
  
  	sys_close(0);
  	fd_install(0, rp);
  	spin_lock(&cf->file_lock);
  	fdt = files_fdtable(cf);
  	FD_SET(0, fdt->open_fds);
  	FD_CLR(0, fdt->close_on_exec);
  	spin_unlock(&cf->file_lock);
  
  	/* and disallow core files too */
  	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
  
  	return 0;
  }
8cd3ac3ac   WANG Cong   fs/exec.c: make d...
1797
  void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1798
  {
9d5b327bf   Oleg Nesterov   coredump: make mm...
1799
  	struct core_state core_state;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1800
1801
1802
  	char corename[CORENAME_MAX_SIZE + 1];
  	struct mm_struct *mm = current->mm;
  	struct linux_binfmt * binfmt;
d84f4f992   David Howells   CRED: Inaugurate ...
1803
1804
  	const struct cred *old_cred;
  	struct cred *cred;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1805
  	int retval = 0;
d6e711448   Alan Cox   [PATCH] setuid co...
1806
  	int flag = 0;
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
1807
  	int ispipe;
a293980c2   Neil Horman   exec: let do_core...
1808
  	static atomic_t core_dump_count = ATOMIC_INIT(0);
f6151dfea   Masami Hiramatsu   mm: introduce cor...
1809
1810
1811
  	struct coredump_params cprm = {
  		.signr = signr,
  		.regs = regs,
d554ed895   Jiri Slaby   fs: use rlimit he...
1812
  		.limit = rlimit(RLIMIT_CORE),
30736a4d4   Masami Hiramatsu   coredump: pass mm...
1813
1814
1815
1816
1817
1818
  		/*
  		 * We must use the same mm->flags while dumping core to avoid
  		 * inconsistency of bit flags, since this flag is not protected
  		 * by any locks.
  		 */
  		.mm_flags = mm->flags,
f6151dfea   Masami Hiramatsu   mm: introduce cor...
1819
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1820

0a4ff8c25   Steve Grubb   [PATCH] Abnormal ...
1821
  	audit_core_dumps(signr);
801460d0c   Hiroshi Shimamoto   task_struct clean...
1822
  	binfmt = mm->binfmt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1823
1824
  	if (!binfmt || !binfmt->core_dump)
  		goto fail;
269b005a2   Oleg Nesterov   coredump: shift d...
1825
1826
  	if (!__get_dumpable(cprm.mm_flags))
  		goto fail;
d84f4f992   David Howells   CRED: Inaugurate ...
1827
1828
  
  	cred = prepare_creds();
5e43aef53   Oleg Nesterov   coredump: factor ...
1829
  	if (!cred)
d84f4f992   David Howells   CRED: Inaugurate ...
1830
  		goto fail;
d6e711448   Alan Cox   [PATCH] setuid co...
1831
1832
1833
1834
1835
  	/*
  	 *	We cannot trust fsuid as being the "true" uid of the
  	 *	process nor do we know its entire history. We only know it
  	 *	was tainted so we dump it as root in mode 2.
  	 */
30736a4d4   Masami Hiramatsu   coredump: pass mm...
1836
1837
  	if (__get_dumpable(cprm.mm_flags) == 2) {
  		/* Setuid core dump mode */
d6e711448   Alan Cox   [PATCH] setuid co...
1838
  		flag = O_EXCL;		/* Stop rewrite attacks */
d84f4f992   David Howells   CRED: Inaugurate ...
1839
  		cred->fsuid = 0;	/* Dump root private */
d6e711448   Alan Cox   [PATCH] setuid co...
1840
  	}
1291cf416   Oleg Nesterov   [PATCH] fix de_th...
1841

9d5b327bf   Oleg Nesterov   coredump: make mm...
1842
  	retval = coredump_wait(exit_code, &core_state);
5e43aef53   Oleg Nesterov   coredump: factor ...
1843
1844
  	if (retval < 0)
  		goto fail_creds;
d84f4f992   David Howells   CRED: Inaugurate ...
1845
1846
  
  	old_cred = override_creds(cred);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1847
1848
1849
1850
1851
  
  	/*
  	 * Clear any false indication of pending signals that might
  	 * be seen by the filesystem code called to write the core file.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1852
  	clear_thread_flag(TIF_SIGPENDING);
6409324b3   Oleg Nesterov   coredump: format_...
1853
  	ispipe = format_corename(corename, signr);
725eae32d   Neil Horman   exec: make do_cor...
1854

c4bbafda7   Alan Cox   exec.c: fix cored...
1855
   	if (ispipe) {
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
1856
1857
  		int dump_count;
  		char **helper_argv;
898b374af   Neil Horman   exec: replace cal...
1858
  		if (cprm.limit == 1) {
725eae32d   Neil Horman   exec: make do_cor...
1859
1860
1861
  			/*
  			 * Normally core limits are irrelevant to pipes, since
  			 * we're not writing to the file system, but we use
898b374af   Neil Horman   exec: replace cal...
1862
1863
  			 * cprm.limit of 1 here as a speacial value. Any
  			 * non-1 limit gets set to RLIM_INFINITY below, but
725eae32d   Neil Horman   exec: make do_cor...
1864
1865
  			 * a limit of 0 skips the dump.  This is a consistent
  			 * way to catch recursive crashes.  We can still crash
898b374af   Neil Horman   exec: replace cal...
1866
  			 * if the core_pattern binary sets RLIM_CORE =  !1
725eae32d   Neil Horman   exec: make do_cor...
1867
1868
1869
1870
1871
1872
1873
  			 * but it runs as root, and can do lots of stupid things
  			 * Note that we use task_tgid_vnr here to grab the pid
  			 * of the process group leader.  That way we get the
  			 * right pid if a thread in a multi-threaded
  			 * core_pattern process dies.
  			 */
  			printk(KERN_WARNING
898b374af   Neil Horman   exec: replace cal...
1874
1875
  				"Process %d(%s) has RLIMIT_CORE set to 1
  ",
725eae32d   Neil Horman   exec: make do_cor...
1876
1877
1878
1879
1880
  				task_tgid_vnr(current), current->comm);
  			printk(KERN_WARNING "Aborting core
  ");
  			goto fail_unlock;
  		}
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
1881
  		cprm.limit = RLIM_INFINITY;
725eae32d   Neil Horman   exec: make do_cor...
1882

a293980c2   Neil Horman   exec: let do_core...
1883
1884
1885
1886
1887
1888
1889
1890
1891
  		dump_count = atomic_inc_return(&core_dump_count);
  		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
  			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit
  ",
  			       task_tgid_vnr(current), current->comm);
  			printk(KERN_WARNING "Skipping core dump
  ");
  			goto fail_dropcount;
  		}
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
1892
  		helper_argv = argv_split(GFP_KERNEL, corename+1, NULL);
350eaf791   Tetsuo Handa   do_coredump(): ch...
1893
1894
1895
1896
  		if (!helper_argv) {
  			printk(KERN_WARNING "%s failed to allocate memory
  ",
  			       __func__);
a293980c2   Neil Horman   exec: let do_core...
1897
  			goto fail_dropcount;
350eaf791   Tetsuo Handa   do_coredump(): ch...
1898
  		}
323211371   Neil Horman   core_pattern: fix...
1899

d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
1900
1901
1902
1903
1904
  		retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
  					NULL, UMH_WAIT_EXEC, umh_pipe_setup,
  					NULL, &cprm);
  		argv_free(helper_argv);
  		if (retval) {
d025c9db7   Andi Kleen   [PATCH] Support p...
1905
1906
1907
   			printk(KERN_INFO "Core dump to %s pipe failed
  ",
  			       corename);
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
1908
  			goto close_fail;
d025c9db7   Andi Kleen   [PATCH] Support p...
1909
   		}
c71354112   Oleg Nesterov   coredump: factor ...
1910
1911
1912
1913
1914
  	} else {
  		struct inode *inode;
  
  		if (cprm.limit < binfmt->min_coredump)
  			goto fail_unlock;
f6151dfea   Masami Hiramatsu   mm: introduce cor...
1915
  		cprm.file = filp_open(corename,
6d4df677f   Alexey Dobriyan   [PATCH] do_coredu...
1916
1917
  				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
  				 0600);
c71354112   Oleg Nesterov   coredump: factor ...
1918
1919
  		if (IS_ERR(cprm.file))
  			goto fail_unlock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1920

c71354112   Oleg Nesterov   coredump: factor ...
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
  		inode = cprm.file->f_path.dentry->d_inode;
  		if (inode->i_nlink > 1)
  			goto close_fail;
  		if (d_unhashed(cprm.file->f_path.dentry))
  			goto close_fail;
  		/*
  		 * AK: actually i see no reason to not allow this for named
  		 * pipes etc, but keep the previous behaviour for now.
  		 */
  		if (!S_ISREG(inode->i_mode))
  			goto close_fail;
  		/*
  		 * Dont allow local users get cute and trick others to coredump
  		 * into their pre-created files.
  		 */
  		if (inode->i_uid != current_fsuid())
  			goto close_fail;
  		if (!cprm.file->f_op || !cprm.file->f_op->write)
  			goto close_fail;
  		if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
  			goto close_fail;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1943

c71354112   Oleg Nesterov   coredump: factor ...
1944
  	retval = binfmt->core_dump(&cprm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1945
1946
  	if (retval)
  		current->signal->group_exit_code |= 0x80;
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
1947

61be228a0   Neil Horman   exec: allow do_co...
1948
  	if (ispipe && core_pipe_limit)
f6151dfea   Masami Hiramatsu   mm: introduce cor...
1949
  		wait_for_dump_helpers(cprm.file);
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
1950
1951
1952
  close_fail:
  	if (cprm.file)
  		filp_close(cprm.file, NULL);
a293980c2   Neil Horman   exec: let do_core...
1953
  fail_dropcount:
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
1954
  	if (ispipe)
a293980c2   Neil Horman   exec: let do_core...
1955
  		atomic_dec(&core_dump_count);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1956
  fail_unlock:
5e43aef53   Oleg Nesterov   coredump: factor ...
1957
  	coredump_finish(mm);
d84f4f992   David Howells   CRED: Inaugurate ...
1958
  	revert_creds(old_cred);
5e43aef53   Oleg Nesterov   coredump: factor ...
1959
  fail_creds:
d84f4f992   David Howells   CRED: Inaugurate ...
1960
  	put_cred(cred);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1961
  fail:
8cd3ac3ac   WANG Cong   fs/exec.c: make d...
1962
  	return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1963
  }