Blame view

fs/exec.c 52.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
  /*
   *  linux/fs/exec.c
   *
   *  Copyright (C) 1991, 1992  Linus Torvalds
   */
  
  /*
   * #!-checking implemented by tytso.
   */
  /*
   * Demand-loading implemented 01.12.91 - no need to read anything but
   * the header into memory. The inode of the executable is put into
   * "current->executable", and page faults do the actual loading. Clean.
   *
   * Once more I can proudly say that linux stood up to being changed: it
   * was less than 2 hours work to get demand-loading completely implemented.
   *
   * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
   * current->executable is only used by the procfs.  This allows a dispatch
   * table to check for several different types  of binary formats.  We keep
   * trying until we recognize the file or we run out of supported binary
   * formats. 
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
  #include <linux/slab.h>
  #include <linux/file.h>
9f3acc314   Al Viro   [PATCH] split lin...
26
  #include <linux/fdtable.h>
ba92a43db   Hugh Dickins   exec: remove some...
27
  #include <linux/mm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
29
  #include <linux/stat.h>
  #include <linux/fcntl.h>
ba92a43db   Hugh Dickins   exec: remove some...
30
  #include <linux/swap.h>
74aadce98   Neil Horman   core_pattern: all...
31
  #include <linux/string.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
32
  #include <linux/init.h>
ca5b172bd   Hugh Dickins   exec: include pag...
33
  #include <linux/pagemap.h>
cdd6c482c   Ingo Molnar   perf: Do the big ...
34
  #include <linux/perf_event.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35
36
37
38
39
  #include <linux/highmem.h>
  #include <linux/spinlock.h>
  #include <linux/key.h>
  #include <linux/personality.h>
  #include <linux/binfmts.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
  #include <linux/utsname.h>
84d737866   Sukadev Bhattiprolu   [PATCH] add child...
41
  #include <linux/pid_namespace.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
  #include <linux/module.h>
  #include <linux/namei.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44
45
46
  #include <linux/mount.h>
  #include <linux/security.h>
  #include <linux/syscalls.h>
8f0ab5147   Jay Lan   [PATCH] csa: conv...
47
  #include <linux/tsacct_kern.h>
9f46080c4   Matt Helsley   [PATCH] Process E...
48
  #include <linux/cn_proc.h>
473ae30bc   Al Viro   [PATCH] execve ar...
49
  #include <linux/audit.h>
6341c393f   Roland McGrath   tracehook: exec
50
  #include <linux/tracehook.h>
5f4123be3   Johannes Berg   remove CONFIG_KMO...
51
  #include <linux/kmod.h>
6110e3abb   Eric Paris   sys_execve and sy...
52
  #include <linux/fsnotify.h>
5ad4e53bd   Al Viro   Get rid of indire...
53
  #include <linux/fs_struct.h>
61be228a0   Neil Horman   exec: allow do_co...
54
  #include <linux/pipe_fs_i.h>
3d5992d2a   Ying Han   oom: add per-mm o...
55
  #include <linux/oom.h>
0e028465d   Oleg Nesterov   exec: unify do_ex...
56
  #include <linux/compat.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
57
58
59
  
  #include <asm/uaccess.h>
  #include <asm/mmu_context.h>
b6a2fea39   Ollie Wild   mm: variable leng...
60
  #include <asm/tlb.h>
a6f76f23d   David Howells   CRED: Make execve...
61
  #include "internal.h"
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
63
  int core_uses_pid;
71ce92f3f   Dan Aloni   make sysctl/kerne...
64
  char core_pattern[CORENAME_MAX_SIZE] = "core";
a293980c2   Neil Horman   exec: let do_core...
65
  unsigned int core_pipe_limit;
d6e711448   Alan Cox   [PATCH] setuid co...
66
  int suid_dumpable = 0;
1b0d300bd   Xiaotian Feng   core_pattern: fix...
67
68
69
70
71
  struct core_name {
  	char *corename;
  	int used, size;
  };
  static atomic_t call_count = ATOMIC_INIT(1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
72
  /* The maximal length of core_pattern is also specified in sysctl.c */
e4dc1b14d   Alexey Dobriyan   Use list_head in ...
73
  static LIST_HEAD(formats);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
74
  static DEFINE_RWLOCK(binfmt_lock);
74641f584   Ivan Kokshaysky   alpha: binfmt_aou...
75
  int __register_binfmt(struct linux_binfmt * fmt, int insert)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
76
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
77
78
  	if (!fmt)
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
79
  	write_lock(&binfmt_lock);
74641f584   Ivan Kokshaysky   alpha: binfmt_aou...
80
81
  	insert ? list_add(&fmt->lh, &formats) :
  		 list_add_tail(&fmt->lh, &formats);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
83
84
  	write_unlock(&binfmt_lock);
  	return 0;	
  }
74641f584   Ivan Kokshaysky   alpha: binfmt_aou...
85
  EXPORT_SYMBOL(__register_binfmt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86

f6b450d48   Alexey Dobriyan   Make unregister_b...
87
  void unregister_binfmt(struct linux_binfmt * fmt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
  	write_lock(&binfmt_lock);
e4dc1b14d   Alexey Dobriyan   Use list_head in ...
90
  	list_del(&fmt->lh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
91
  	write_unlock(&binfmt_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  }
  
  EXPORT_SYMBOL(unregister_binfmt);
  
  static inline void put_binfmt(struct linux_binfmt * fmt)
  {
  	module_put(fmt->module);
  }
  
  /*
   * Note that a shared library must be both readable and executable due to
   * security reasons.
   *
   * Also note that we take the address to load from from the file itself.
   */
1e7bfb213   Heiko Carstens   [CVE-2009-0029] S...
107
  SYSCALL_DEFINE1(uselib, const char __user *, library)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
  {
964bd1836   Al Viro   [PATCH] get rid o...
109
  	struct file *file;
964bd1836   Al Viro   [PATCH] get rid o...
110
111
  	char *tmp = getname(library);
  	int error = PTR_ERR(tmp);
47c805dc2   Al Viro   switch do_filp_op...
112
113
114
115
116
  	static const struct open_flags uselib_flags = {
  		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
  		.acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
  		.intent = LOOKUP_OPEN
  	};
964bd1836   Al Viro   [PATCH] get rid o...
117

6e8341a11   Al Viro   Switch open_exec(...
118
119
  	if (IS_ERR(tmp))
  		goto out;
47c805dc2   Al Viro   switch do_filp_op...
120
  	file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
6e8341a11   Al Viro   Switch open_exec(...
121
122
123
  	putname(tmp);
  	error = PTR_ERR(file);
  	if (IS_ERR(file))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
125
126
  		goto out;
  
  	error = -EINVAL;
6e8341a11   Al Viro   Switch open_exec(...
127
  	if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
  		goto exit;
30524472c   Al Viro   [PATCH] take noex...
129
  	error = -EACCES;
6e8341a11   Al Viro   Switch open_exec(...
130
  	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
6146f0d5e   Mimi Zohar   integrity: IMA hooks
131
  		goto exit;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132

2a12a9d78   Eric Paris   fsnotify: pass a ...
133
  	fsnotify_open(file);
6110e3abb   Eric Paris   sys_execve and sy...
134

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135
136
137
138
139
  	error = -ENOEXEC;
  	if(file->f_op) {
  		struct linux_binfmt * fmt;
  
  		read_lock(&binfmt_lock);
e4dc1b14d   Alexey Dobriyan   Use list_head in ...
140
  		list_for_each_entry(fmt, &formats, lh) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
141
142
143
144
145
146
147
148
149
150
151
152
153
  			if (!fmt->load_shlib)
  				continue;
  			if (!try_module_get(fmt->module))
  				continue;
  			read_unlock(&binfmt_lock);
  			error = fmt->load_shlib(file);
  			read_lock(&binfmt_lock);
  			put_binfmt(fmt);
  			if (error != -ENOEXEC)
  				break;
  		}
  		read_unlock(&binfmt_lock);
  	}
6e8341a11   Al Viro   Switch open_exec(...
154
  exit:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
155
156
157
  	fput(file);
  out:
    	return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
158
  }
b6a2fea39   Ollie Wild   mm: variable leng...
159
  #ifdef CONFIG_MMU
ae6b585ee   Oleg Nesterov   exec: document ac...
160
161
162
163
164
165
  /*
   * The nascent bprm->mm is not visible until exec_mmap() but it can
   * use a lot of memory, account these pages in current->mm temporary
   * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
   * change the counter back via acct_arg_size(0).
   */
0e028465d   Oleg Nesterov   exec: unify do_ex...
166
  static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
3c77f8457   Oleg Nesterov   exec: make argv/e...
167
168
169
170
171
172
173
174
  {
  	struct mm_struct *mm = current->mm;
  	long diff = (long)(pages - bprm->vma_pages);
  
  	if (!mm || !diff)
  		return;
  
  	bprm->vma_pages = pages;
3c77f8457   Oleg Nesterov   exec: make argv/e...
175
  	add_mm_counter(mm, MM_ANONPAGES, diff);
3c77f8457   Oleg Nesterov   exec: make argv/e...
176
  }
0e028465d   Oleg Nesterov   exec: unify do_ex...
177
  static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
b6a2fea39   Ollie Wild   mm: variable leng...
178
179
180
181
182
183
184
  		int write)
  {
  	struct page *page;
  	int ret;
  
  #ifdef CONFIG_STACK_GROWSUP
  	if (write) {
d05f3169c   Michal Hocko   mm: make expand_d...
185
  		ret = expand_downwards(bprm->vma, pos);
b6a2fea39   Ollie Wild   mm: variable leng...
186
187
188
189
190
191
192
193
194
195
  		if (ret < 0)
  			return NULL;
  	}
  #endif
  	ret = get_user_pages(current, bprm->mm, pos,
  			1, write, 1, &page, NULL);
  	if (ret <= 0)
  		return NULL;
  
  	if (write) {
b6a2fea39   Ollie Wild   mm: variable leng...
196
  		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
a64e715fc   Linus Torvalds   Allow ARG_MAX exe...
197
  		struct rlimit *rlim;
3c77f8457   Oleg Nesterov   exec: make argv/e...
198
  		acct_arg_size(bprm, size / PAGE_SIZE);
a64e715fc   Linus Torvalds   Allow ARG_MAX exe...
199
200
201
202
203
204
  		/*
  		 * We've historically supported up to 32 pages (ARG_MAX)
  		 * of argument strings even with small stacks
  		 */
  		if (size <= ARG_MAX)
  			return page;
b6a2fea39   Ollie Wild   mm: variable leng...
205
206
207
208
209
210
211
212
  
  		/*
  		 * Limit to 1/4-th the stack size for the argv+env strings.
  		 * This ensures that:
  		 *  - the remaining binfmt code will not run out of stack space,
  		 *  - the program will have a reasonable amount of stack left
  		 *    to work from.
  		 */
a64e715fc   Linus Torvalds   Allow ARG_MAX exe...
213
  		rlim = current->signal->rlim;
d554ed895   Jiri Slaby   fs: use rlimit he...
214
  		if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
b6a2fea39   Ollie Wild   mm: variable leng...
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
  			put_page(page);
  			return NULL;
  		}
  	}
  
  	return page;
  }
  
  static void put_arg_page(struct page *page)
  {
  	put_page(page);
  }
  
  static void free_arg_page(struct linux_binprm *bprm, int i)
  {
  }
  
  static void free_arg_pages(struct linux_binprm *bprm)
  {
  }
  
  static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
  		struct page *page)
  {
  	flush_cache_page(bprm->vma, pos, page_to_pfn(page));
  }
  
  static int __bprm_mm_init(struct linux_binprm *bprm)
  {
eaccbfa56   Luiz Fernando N. Capitulino   fs/exec.c:__bprm_...
244
  	int err;
b6a2fea39   Ollie Wild   mm: variable leng...
245
246
247
248
249
  	struct vm_area_struct *vma = NULL;
  	struct mm_struct *mm = bprm->mm;
  
  	bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
  	if (!vma)
eaccbfa56   Luiz Fernando N. Capitulino   fs/exec.c:__bprm_...
250
  		return -ENOMEM;
b6a2fea39   Ollie Wild   mm: variable leng...
251
252
253
254
255
256
257
258
259
260
  
  	down_write(&mm->mmap_sem);
  	vma->vm_mm = mm;
  
  	/*
  	 * Place the stack at the largest stack address the architecture
  	 * supports. Later, we'll move this to an appropriate place. We don't
  	 * use STACK_TOP because that can depend on attributes which aren't
  	 * configured yet.
  	 */
aacb3d17a   Michal Hocko   fs/exec.c: use BU...
261
  	BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
b6a2fea39   Ollie Wild   mm: variable leng...
262
263
  	vma->vm_end = STACK_TOP_MAX;
  	vma->vm_start = vma->vm_end - PAGE_SIZE;
a8bef8ff6   Mel Gorman   mm: migration: av...
264
  	vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
3ed75eb8f   Coly Li   setup vma->vm_pag...
265
  	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
5beb49305   Rik van Riel   mm: change anon_v...
266
  	INIT_LIST_HEAD(&vma->anon_vma_chain);
462e635e5   Tavis Ormandy   install_special_m...
267
268
269
270
  
  	err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
  	if (err)
  		goto err;
b6a2fea39   Ollie Wild   mm: variable leng...
271
  	err = insert_vm_struct(mm, vma);
eaccbfa56   Luiz Fernando N. Capitulino   fs/exec.c:__bprm_...
272
  	if (err)
b6a2fea39   Ollie Wild   mm: variable leng...
273
  		goto err;
b6a2fea39   Ollie Wild   mm: variable leng...
274
275
276
  
  	mm->stack_vm = mm->total_vm = 1;
  	up_write(&mm->mmap_sem);
b6a2fea39   Ollie Wild   mm: variable leng...
277
  	bprm->p = vma->vm_end - sizeof(void *);
b6a2fea39   Ollie Wild   mm: variable leng...
278
  	return 0;
b6a2fea39   Ollie Wild   mm: variable leng...
279
  err:
eaccbfa56   Luiz Fernando N. Capitulino   fs/exec.c:__bprm_...
280
281
282
  	up_write(&mm->mmap_sem);
  	bprm->vma = NULL;
  	kmem_cache_free(vm_area_cachep, vma);
b6a2fea39   Ollie Wild   mm: variable leng...
283
284
285
286
287
288
289
290
291
  	return err;
  }
  
  static bool valid_arg_len(struct linux_binprm *bprm, long len)
  {
  	return len <= MAX_ARG_STRLEN;
  }
  
  #else
0e028465d   Oleg Nesterov   exec: unify do_ex...
292
  static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
3c77f8457   Oleg Nesterov   exec: make argv/e...
293
294
  {
  }
0e028465d   Oleg Nesterov   exec: unify do_ex...
295
  static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
b6a2fea39   Ollie Wild   mm: variable leng...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
  		int write)
  {
  	struct page *page;
  
  	page = bprm->page[pos / PAGE_SIZE];
  	if (!page && write) {
  		page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
  		if (!page)
  			return NULL;
  		bprm->page[pos / PAGE_SIZE] = page;
  	}
  
  	return page;
  }
  
  static void put_arg_page(struct page *page)
  {
  }
  
  static void free_arg_page(struct linux_binprm *bprm, int i)
  {
  	if (bprm->page[i]) {
  		__free_page(bprm->page[i]);
  		bprm->page[i] = NULL;
  	}
  }
  
  static void free_arg_pages(struct linux_binprm *bprm)
  {
  	int i;
  
  	for (i = 0; i < MAX_ARG_PAGES; i++)
  		free_arg_page(bprm, i);
  }
  
  static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
  		struct page *page)
  {
  }
  
  static int __bprm_mm_init(struct linux_binprm *bprm)
  {
  	bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
  	return 0;
  }
  
  static bool valid_arg_len(struct linux_binprm *bprm, long len)
  {
  	return len <= bprm->p;
  }
  
  #endif /* CONFIG_MMU */
  
  /*
   * Create a new mm_struct and populate it with a temporary stack
   * vm_area_struct.  We don't have enough context at this point to set the stack
   * flags, permissions, and offset, so we use temporary values.  We'll update
   * them later in setup_arg_pages().
   */
  int bprm_mm_init(struct linux_binprm *bprm)
  {
  	int err;
  	struct mm_struct *mm = NULL;
  
  	bprm->mm = mm = mm_alloc();
  	err = -ENOMEM;
  	if (!mm)
  		goto err;
  
  	err = init_new_context(current, mm);
  	if (err)
  		goto err;
  
  	err = __bprm_mm_init(bprm);
  	if (err)
  		goto err;
  
  	return 0;
  
  err:
  	if (mm) {
  		bprm->mm = NULL;
  		mmdrop(mm);
  	}
  
  	return err;
  }
ba2d01629   Oleg Nesterov   exec: introduce s...
383
  struct user_arg_ptr {
0e028465d   Oleg Nesterov   exec: unify do_ex...
384
385
386
387
388
389
390
391
392
  #ifdef CONFIG_COMPAT
  	bool is_compat;
  #endif
  	union {
  		const char __user *const __user *native;
  #ifdef CONFIG_COMPAT
  		compat_uptr_t __user *compat;
  #endif
  	} ptr;
ba2d01629   Oleg Nesterov   exec: introduce s...
393
394
395
  };
  
  static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
1d1dbf813   Oleg Nesterov   exec: introduce g...
396
  {
0e028465d   Oleg Nesterov   exec: unify do_ex...
397
398
399
400
401
402
403
404
  	const char __user *native;
  
  #ifdef CONFIG_COMPAT
  	if (unlikely(argv.is_compat)) {
  		compat_uptr_t compat;
  
  		if (get_user(compat, argv.ptr.compat + nr))
  			return ERR_PTR(-EFAULT);
1d1dbf813   Oleg Nesterov   exec: introduce g...
405

0e028465d   Oleg Nesterov   exec: unify do_ex...
406
407
408
409
410
  		return compat_ptr(compat);
  	}
  #endif
  
  	if (get_user(native, argv.ptr.native + nr))
1d1dbf813   Oleg Nesterov   exec: introduce g...
411
  		return ERR_PTR(-EFAULT);
0e028465d   Oleg Nesterov   exec: unify do_ex...
412
  	return native;
1d1dbf813   Oleg Nesterov   exec: introduce g...
413
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
414
415
416
  /*
   * count() counts the number of strings in array ARGV.
   */
ba2d01629   Oleg Nesterov   exec: introduce s...
417
  static int count(struct user_arg_ptr argv, int max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
419
  {
  	int i = 0;
0e028465d   Oleg Nesterov   exec: unify do_ex...
420
  	if (argv.ptr.native != NULL) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421
  		for (;;) {
1d1dbf813   Oleg Nesterov   exec: introduce g...
422
  			const char __user *p = get_user_arg_ptr(argv, i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
423

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
424
425
  			if (!p)
  				break;
1d1dbf813   Oleg Nesterov   exec: introduce g...
426
427
428
  
  			if (IS_ERR(p))
  				return -EFAULT;
362e6663e   Jason Baron   exec.c, compat.c:...
429
  			if (i++ >= max)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
430
  				return -E2BIG;
9aea5a65a   Roland McGrath   execve: make resp...
431
432
433
  
  			if (fatal_signal_pending(current))
  				return -ERESTARTNOHAND;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
434
435
436
437
438
439
440
  			cond_resched();
  		}
  	}
  	return i;
  }
  
  /*
b6a2fea39   Ollie Wild   mm: variable leng...
441
442
443
   * 'copy_strings()' copies argument/environment strings from the old
   * processes's memory to the new process's stack.  The call to get_user_pages()
   * ensures the destination page is created and not swapped out.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
   */
ba2d01629   Oleg Nesterov   exec: introduce s...
445
  static int copy_strings(int argc, struct user_arg_ptr argv,
75c96f858   Adrian Bunk   [PATCH] make some...
446
  			struct linux_binprm *bprm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
447
448
449
  {
  	struct page *kmapped_page = NULL;
  	char *kaddr = NULL;
b6a2fea39   Ollie Wild   mm: variable leng...
450
  	unsigned long kpos = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
451
452
453
  	int ret;
  
  	while (argc-- > 0) {
d7627467b   David Howells   Make do_execve() ...
454
  		const char __user *str;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
456
  		int len;
  		unsigned long pos;
1d1dbf813   Oleg Nesterov   exec: introduce g...
457
458
459
  		ret = -EFAULT;
  		str = get_user_arg_ptr(argv, argc);
  		if (IS_ERR(str))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
460
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461

1d1dbf813   Oleg Nesterov   exec: introduce g...
462
463
464
465
466
467
  		len = strnlen_user(str, MAX_ARG_STRLEN);
  		if (!len)
  			goto out;
  
  		ret = -E2BIG;
  		if (!valid_arg_len(bprm, len))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
468
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
469

b6a2fea39   Ollie Wild   mm: variable leng...
470
  		/* We're going to work our way backwords. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
  		pos = bprm->p;
b6a2fea39   Ollie Wild   mm: variable leng...
472
473
  		str += len;
  		bprm->p -= len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
474
475
  
  		while (len > 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
  			int offset, bytes_to_copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477

9aea5a65a   Roland McGrath   execve: make resp...
478
479
480
481
  			if (fatal_signal_pending(current)) {
  				ret = -ERESTARTNOHAND;
  				goto out;
  			}
7993bc1f4   Roland McGrath   execve: improve i...
482
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
483
  			offset = pos % PAGE_SIZE;
b6a2fea39   Ollie Wild   mm: variable leng...
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
  			if (offset == 0)
  				offset = PAGE_SIZE;
  
  			bytes_to_copy = offset;
  			if (bytes_to_copy > len)
  				bytes_to_copy = len;
  
  			offset -= bytes_to_copy;
  			pos -= bytes_to_copy;
  			str -= bytes_to_copy;
  			len -= bytes_to_copy;
  
  			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
  				struct page *page;
  
  				page = get_arg_page(bprm, pos, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
500
  				if (!page) {
b6a2fea39   Ollie Wild   mm: variable leng...
501
  					ret = -E2BIG;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
502
503
  					goto out;
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
504

b6a2fea39   Ollie Wild   mm: variable leng...
505
506
  				if (kmapped_page) {
  					flush_kernel_dcache_page(kmapped_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
507
  					kunmap(kmapped_page);
b6a2fea39   Ollie Wild   mm: variable leng...
508
509
  					put_arg_page(kmapped_page);
  				}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
511
  				kmapped_page = page;
  				kaddr = kmap(kmapped_page);
b6a2fea39   Ollie Wild   mm: variable leng...
512
513
  				kpos = pos & PAGE_MASK;
  				flush_arg_page(bprm, kpos, kmapped_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
514
  			}
b6a2fea39   Ollie Wild   mm: variable leng...
515
  			if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
517
518
  				ret = -EFAULT;
  				goto out;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
519
520
521
522
  		}
  	}
  	ret = 0;
  out:
b6a2fea39   Ollie Wild   mm: variable leng...
523
524
  	if (kmapped_page) {
  		flush_kernel_dcache_page(kmapped_page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525
  		kunmap(kmapped_page);
b6a2fea39   Ollie Wild   mm: variable leng...
526
527
  		put_arg_page(kmapped_page);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
528
529
530
531
532
533
  	return ret;
  }
  
  /*
   * Like copy_strings, but get argv and its values from kernel memory.
   */
ba2d01629   Oleg Nesterov   exec: introduce s...
534
  int copy_strings_kernel(int argc, const char *const *__argv,
d7627467b   David Howells   Make do_execve() ...
535
  			struct linux_binprm *bprm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
536
537
538
  {
  	int r;
  	mm_segment_t oldfs = get_fs();
ba2d01629   Oleg Nesterov   exec: introduce s...
539
  	struct user_arg_ptr argv = {
0e028465d   Oleg Nesterov   exec: unify do_ex...
540
  		.ptr.native = (const char __user *const  __user *)__argv,
ba2d01629   Oleg Nesterov   exec: introduce s...
541
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542
  	set_fs(KERNEL_DS);
ba2d01629   Oleg Nesterov   exec: introduce s...
543
  	r = copy_strings(argc, argv, bprm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
544
  	set_fs(oldfs);
ba2d01629   Oleg Nesterov   exec: introduce s...
545

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
546
547
  	return r;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
548
549
550
  EXPORT_SYMBOL(copy_strings_kernel);
  
  #ifdef CONFIG_MMU
b6a2fea39   Ollie Wild   mm: variable leng...
551

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
552
  /*
b6a2fea39   Ollie Wild   mm: variable leng...
553
554
555
   * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
   * the binfmt code determines where the new stack should reside, we shift it to
   * its final location.  The process proceeds as follows:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
556
   *
b6a2fea39   Ollie Wild   mm: variable leng...
557
558
559
560
561
562
   * 1) Use shift to calculate the new vma endpoints.
   * 2) Extend vma to cover both the old and new ranges.  This ensures the
   *    arguments passed to subsequent functions are consistent.
   * 3) Move vma's page tables to the new range.
   * 4) Free up any cleared pgd range.
   * 5) Shrink the vma to cover only the new range.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563
   */
b6a2fea39   Ollie Wild   mm: variable leng...
564
  static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565
566
  {
  	struct mm_struct *mm = vma->vm_mm;
b6a2fea39   Ollie Wild   mm: variable leng...
567
568
569
570
571
  	unsigned long old_start = vma->vm_start;
  	unsigned long old_end = vma->vm_end;
  	unsigned long length = old_end - old_start;
  	unsigned long new_start = old_start - shift;
  	unsigned long new_end = old_end - shift;
d16dfc550   Peter Zijlstra   mm: mmu_gather re...
572
  	struct mmu_gather tlb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573

b6a2fea39   Ollie Wild   mm: variable leng...
574
  	BUG_ON(new_start > new_end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575

b6a2fea39   Ollie Wild   mm: variable leng...
576
577
578
579
580
581
582
583
584
585
  	/*
  	 * ensure there are no vmas between where we want to go
  	 * and where we are
  	 */
  	if (vma != find_vma(mm, new_start))
  		return -EFAULT;
  
  	/*
  	 * cover the whole range: [new_start, old_end)
  	 */
5beb49305   Rik van Riel   mm: change anon_v...
586
587
  	if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
  		return -ENOMEM;
b6a2fea39   Ollie Wild   mm: variable leng...
588
589
590
591
592
593
594
595
596
597
  
  	/*
  	 * move the page tables downwards, on failure we rely on
  	 * process cleanup to remove whatever mess we made.
  	 */
  	if (length != move_page_tables(vma, old_start,
  				       vma, new_start, length))
  		return -ENOMEM;
  
  	lru_add_drain();
d16dfc550   Peter Zijlstra   mm: mmu_gather re...
598
  	tlb_gather_mmu(&tlb, mm, 0);
b6a2fea39   Ollie Wild   mm: variable leng...
599
600
601
602
  	if (new_end > old_start) {
  		/*
  		 * when the old and new regions overlap clear from new_end.
  		 */
d16dfc550   Peter Zijlstra   mm: mmu_gather re...
603
  		free_pgd_range(&tlb, new_end, old_end, new_end,
b6a2fea39   Ollie Wild   mm: variable leng...
604
605
606
607
608
609
610
611
  			vma->vm_next ? vma->vm_next->vm_start : 0);
  	} else {
  		/*
  		 * otherwise, clean from old_start; this is done to not touch
  		 * the address space in [new_end, old_start) some architectures
  		 * have constraints on va-space that make this illegal (IA64) -
  		 * for the others its just a little faster.
  		 */
d16dfc550   Peter Zijlstra   mm: mmu_gather re...
612
  		free_pgd_range(&tlb, old_start, old_end, new_end,
b6a2fea39   Ollie Wild   mm: variable leng...
613
  			vma->vm_next ? vma->vm_next->vm_start : 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
614
  	}
d16dfc550   Peter Zijlstra   mm: mmu_gather re...
615
  	tlb_finish_mmu(&tlb, new_end, old_end);
b6a2fea39   Ollie Wild   mm: variable leng...
616
617
  
  	/*
5beb49305   Rik van Riel   mm: change anon_v...
618
  	 * Shrink the vma to just the new range.  Always succeeds.
b6a2fea39   Ollie Wild   mm: variable leng...
619
620
621
622
  	 */
  	vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
  
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
623
  }
b6a2fea39   Ollie Wild   mm: variable leng...
624
625
626
627
  /*
   * Finalizes the stack vm_area_struct. The flags and permissions are updated,
   * the stack is optionally relocated, and some extra space is added.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
628
629
630
631
  int setup_arg_pages(struct linux_binprm *bprm,
  		    unsigned long stack_top,
  		    int executable_stack)
  {
b6a2fea39   Ollie Wild   mm: variable leng...
632
633
  	unsigned long ret;
  	unsigned long stack_shift;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
634
  	struct mm_struct *mm = current->mm;
b6a2fea39   Ollie Wild   mm: variable leng...
635
636
637
638
  	struct vm_area_struct *vma = bprm->vma;
  	struct vm_area_struct *prev = NULL;
  	unsigned long vm_flags;
  	unsigned long stack_base;
803bf5ec2   Michael Neuling   fs/exec.c: restri...
639
640
641
  	unsigned long stack_size;
  	unsigned long stack_expand;
  	unsigned long rlim_stack;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
642
643
  
  #ifdef CONFIG_STACK_GROWSUP
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
644
  	/* Limit stack size to 1GB */
d554ed895   Jiri Slaby   fs: use rlimit he...
645
  	stack_base = rlimit_max(RLIMIT_STACK);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
646
647
  	if (stack_base > (1 << 30))
  		stack_base = 1 << 30;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
648

b6a2fea39   Ollie Wild   mm: variable leng...
649
650
651
  	/* Make sure we didn't let the argument array grow too large. */
  	if (vma->vm_end - vma->vm_start > stack_base)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
652

b6a2fea39   Ollie Wild   mm: variable leng...
653
  	stack_base = PAGE_ALIGN(stack_top - stack_base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
654

b6a2fea39   Ollie Wild   mm: variable leng...
655
656
657
  	stack_shift = vma->vm_start - stack_base;
  	mm->arg_start = bprm->p - stack_shift;
  	bprm->p = vma->vm_end - stack_shift;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658
  #else
b6a2fea39   Ollie Wild   mm: variable leng...
659
660
  	stack_top = arch_align_stack(stack_top);
  	stack_top = PAGE_ALIGN(stack_top);
1b528181b   Roland McGrath   setup_arg_pages: ...
661
662
663
664
  
  	if (unlikely(stack_top < mmap_min_addr) ||
  	    unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
  		return -ENOMEM;
b6a2fea39   Ollie Wild   mm: variable leng...
665
666
667
  	stack_shift = vma->vm_end - stack_top;
  
  	bprm->p -= stack_shift;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
668
  	mm->arg_start = bprm->p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
669
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
670
  	if (bprm->loader)
b6a2fea39   Ollie Wild   mm: variable leng...
671
672
  		bprm->loader -= stack_shift;
  	bprm->exec -= stack_shift;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
673

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
674
  	down_write(&mm->mmap_sem);
96a8e13ed   Hugh Dickins   exec: fix stack e...
675
  	vm_flags = VM_STACK_FLAGS;
b6a2fea39   Ollie Wild   mm: variable leng...
676
677
678
679
680
681
682
683
684
685
686
  
  	/*
  	 * Adjust stack execute permissions; explicitly enable for
  	 * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
  	 * (arch default) otherwise.
  	 */
  	if (unlikely(executable_stack == EXSTACK_ENABLE_X))
  		vm_flags |= VM_EXEC;
  	else if (executable_stack == EXSTACK_DISABLE_X)
  		vm_flags &= ~VM_EXEC;
  	vm_flags |= mm->def_flags;
a8bef8ff6   Mel Gorman   mm: migration: av...
687
  	vm_flags |= VM_STACK_INCOMPLETE_SETUP;
b6a2fea39   Ollie Wild   mm: variable leng...
688
689
690
691
692
693
694
695
696
697
  
  	ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
  			vm_flags);
  	if (ret)
  		goto out_unlock;
  	BUG_ON(prev != vma);
  
  	/* Move stack pages down in memory. */
  	if (stack_shift) {
  		ret = shift_arg_pages(vma, stack_shift);
fc63cf237   Anton Blanchard   exec: setup_arg_p...
698
699
  		if (ret)
  			goto out_unlock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
700
  	}
a8bef8ff6   Mel Gorman   mm: migration: av...
701
702
  	/* mprotect_fixup is overkill to remove the temporary stack flags */
  	vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP;
5ef097dd7   Michael Neuling   exec: create init...
703
  	stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
803bf5ec2   Michael Neuling   fs/exec.c: restri...
704
705
706
707
708
709
  	stack_size = vma->vm_end - vma->vm_start;
  	/*
  	 * Align this down to a page boundary as expand_stack
  	 * will align it up.
  	 */
  	rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
b6a2fea39   Ollie Wild   mm: variable leng...
710
  #ifdef CONFIG_STACK_GROWSUP
803bf5ec2   Michael Neuling   fs/exec.c: restri...
711
712
713
714
  	if (stack_size + stack_expand > rlim_stack)
  		stack_base = vma->vm_start + rlim_stack;
  	else
  		stack_base = vma->vm_end + stack_expand;
b6a2fea39   Ollie Wild   mm: variable leng...
715
  #else
803bf5ec2   Michael Neuling   fs/exec.c: restri...
716
717
718
719
  	if (stack_size + stack_expand > rlim_stack)
  		stack_base = vma->vm_end - rlim_stack;
  	else
  		stack_base = vma->vm_start - stack_expand;
b6a2fea39   Ollie Wild   mm: variable leng...
720
  #endif
3af9e8592   Eric B Munson   perf: Add non-exe...
721
  	current->mm->start_stack = bprm->p;
b6a2fea39   Ollie Wild   mm: variable leng...
722
723
724
725
726
  	ret = expand_stack(vma, stack_base);
  	if (ret)
  		ret = -EFAULT;
  
  out_unlock:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727
  	up_write(&mm->mmap_sem);
fc63cf237   Anton Blanchard   exec: setup_arg_p...
728
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
729
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730
  EXPORT_SYMBOL(setup_arg_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
731
732
733
734
  #endif /* CONFIG_MMU */
  
  struct file *open_exec(const char *name)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
735
  	struct file *file;
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
736
  	int err;
47c805dc2   Al Viro   switch do_filp_op...
737
738
739
740
741
  	static const struct open_flags open_exec_flags = {
  		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
  		.acc_mode = MAY_EXEC | MAY_OPEN,
  		.intent = LOOKUP_OPEN
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
742

47c805dc2   Al Viro   switch do_filp_op...
743
  	file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
6e8341a11   Al Viro   Switch open_exec(...
744
  	if (IS_ERR(file))
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
745
746
747
  		goto out;
  
  	err = -EACCES;
6e8341a11   Al Viro   Switch open_exec(...
748
749
  	if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
  		goto exit;
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
750

6e8341a11   Al Viro   Switch open_exec(...
751
752
  	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
  		goto exit;
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
753

2a12a9d78   Eric Paris   fsnotify: pass a ...
754
  	fsnotify_open(file);
6110e3abb   Eric Paris   sys_execve and sy...
755

e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
756
  	err = deny_write_access(file);
6e8341a11   Al Viro   Switch open_exec(...
757
758
  	if (err)
  		goto exit;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
759

6e8341a11   Al Viro   Switch open_exec(...
760
  out:
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
761
  	return file;
6e8341a11   Al Viro   Switch open_exec(...
762
763
  exit:
  	fput(file);
e56b6a5dd   Christoph Hellwig   Re: [PATCH 3/6] v...
764
765
  	return ERR_PTR(err);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
766
  EXPORT_SYMBOL(open_exec);
6777d773a   Mimi Zohar   kernel_read: rede...
767
768
  int kernel_read(struct file *file, loff_t offset,
  		char *addr, unsigned long count)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
  {
  	mm_segment_t old_fs;
  	loff_t pos = offset;
  	int result;
  
  	old_fs = get_fs();
  	set_fs(get_ds());
  	/* The cast to a user pointer is valid due to the set_fs() */
  	result = vfs_read(file, (void __user *)addr, count, &pos);
  	set_fs(old_fs);
  	return result;
  }
  
  EXPORT_SYMBOL(kernel_read);
  
  static int exec_mmap(struct mm_struct *mm)
  {
  	struct task_struct *tsk;
  	struct mm_struct * old_mm, *active_mm;
  
  	/* Notify parent that we're no longer interested in the old VM */
  	tsk = current;
  	old_mm = current->mm;
34e55232e   KAMEZAWA Hiroyuki   mm: avoid false s...
792
  	sync_mm_rss(tsk, old_mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
793
794
795
796
797
798
799
  	mm_release(tsk, old_mm);
  
  	if (old_mm) {
  		/*
  		 * Make sure that if there is a core dump in progress
  		 * for the old mm, we get out and die instead of going
  		 * through with the exec.  We must hold mmap_sem around
999d9fc16   Oleg Nesterov   coredump: move mm...
800
  		 * checking core_state and changing tsk->mm.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
801
802
  		 */
  		down_read(&old_mm->mmap_sem);
999d9fc16   Oleg Nesterov   coredump: move mm...
803
  		if (unlikely(old_mm->core_state)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
804
805
806
807
808
809
810
811
812
  			up_read(&old_mm->mmap_sem);
  			return -EINTR;
  		}
  	}
  	task_lock(tsk);
  	active_mm = tsk->active_mm;
  	tsk->mm = mm;
  	tsk->active_mm = mm;
  	activate_mm(active_mm, mm);
3d5992d2a   Ying Han   oom: add per-mm o...
813
814
815
816
  	if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
  		atomic_dec(&old_mm->oom_disable_count);
  		atomic_inc(&tsk->mm->oom_disable_count);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
817
818
819
820
  	task_unlock(tsk);
  	arch_pick_mmap_layout(mm);
  	if (old_mm) {
  		up_read(&old_mm->mmap_sem);
7dddb12c6   Eric Sesterhenn   BUG_ON() Conversi...
821
  		BUG_ON(active_mm != old_mm);
31a78f23b   Balbir Singh   mm owner: fix rac...
822
  		mm_update_next_owner(old_mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
824
825
826
827
828
829
830
831
832
833
834
835
  		mmput(old_mm);
  		return 0;
  	}
  	mmdrop(active_mm);
  	return 0;
  }
  
  /*
   * This function makes sure the current process has its own signal table,
   * so that flush_signal_handlers can later reset the handlers without
   * disturbing other processes.  (Other processes might share the signal
   * table via the CLONE_SIGHAND option to clone().)
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
836
  static int de_thread(struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
837
838
  {
  	struct signal_struct *sig = tsk->signal;
b2c903b87   Oleg Nesterov   exec: simplify th...
839
  	struct sighand_struct *oldsighand = tsk->sighand;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
840
  	spinlock_t *lock = &oldsighand->siglock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
841

aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
842
  	if (thread_group_empty(tsk))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
843
844
845
846
  		goto no_thread_group;
  
  	/*
  	 * Kill all other threads in the thread group.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
847
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848
  	spin_lock_irq(lock);
ed5d2cac1   Oleg Nesterov   exec: rework the ...
849
  	if (signal_group_exit(sig)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
850
851
852
853
854
  		/*
  		 * Another group action in progress, just
  		 * return so that the signal is processed.
  		 */
  		spin_unlock_irq(lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
855
856
  		return -EAGAIN;
  	}
d344193a0   Oleg Nesterov   exit: avoid sig->...
857

ed5d2cac1   Oleg Nesterov   exec: rework the ...
858
  	sig->group_exit_task = tsk;
d344193a0   Oleg Nesterov   exit: avoid sig->...
859
860
861
  	sig->notify_count = zap_other_threads(tsk);
  	if (!thread_group_leader(tsk))
  		sig->notify_count--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
862

d344193a0   Oleg Nesterov   exit: avoid sig->...
863
  	while (sig->notify_count) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
864
865
866
867
868
  		__set_current_state(TASK_UNINTERRUPTIBLE);
  		spin_unlock_irq(lock);
  		schedule();
  		spin_lock_irq(lock);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
869
870
871
872
873
874
875
  	spin_unlock_irq(lock);
  
  	/*
  	 * At this point all other threads have exited, all we have to
  	 * do is to wait for the thread group leader to become inactive,
  	 * and to assume its PID:
  	 */
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
876
  	if (!thread_group_leader(tsk)) {
8187926bd   Oleg Nesterov   posix-timers: sim...
877
  		struct task_struct *leader = tsk->group_leader;
6db840fa7   Oleg Nesterov   exec: RT sub-thre...
878

2800d8d19   Oleg Nesterov   document de_threa...
879
  		sig->notify_count = -1;	/* for exit_notify() */
6db840fa7   Oleg Nesterov   exec: RT sub-thre...
880
881
882
883
884
885
886
887
  		for (;;) {
  			write_lock_irq(&tasklist_lock);
  			if (likely(leader->exit_state))
  				break;
  			__set_current_state(TASK_UNINTERRUPTIBLE);
  			write_unlock_irq(&tasklist_lock);
  			schedule();
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
888

f5e902817   Roland McGrath   [PATCH] process a...
889
890
891
892
893
894
895
896
897
898
  		/*
  		 * The only record we have of the real-time age of a
  		 * process, regardless of execs it's done, is start_time.
  		 * All the past CPU time is accumulated in signal_struct
  		 * from sister threads now dead.  But in this non-leader
  		 * exec, nothing survives from the original leader thread,
  		 * whose birth marks the true age of this process now.
  		 * When we take on its identity by switching to its PID, we
  		 * also take its birthdate (always earlier than our own).
  		 */
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
899
  		tsk->start_time = leader->start_time;
f5e902817   Roland McGrath   [PATCH] process a...
900

bac0abd61   Pavel Emelyanov   Isolate some expl...
901
902
  		BUG_ON(!same_thread_group(leader, tsk));
  		BUG_ON(has_group_leader_pid(tsk));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
903
904
905
906
907
908
  		/*
  		 * An exec() starts a new thread group with the
  		 * TGID of the previous thread group. Rehash the
  		 * two threads with a switched PID, and release
  		 * the former thread group leader:
  		 */
d73d65293   Eric W. Biederman   [PATCH] pidhash: ...
909
910
  
  		/* Become a process group leader with the old leader's pid.
c18258c6f   Eric W. Biederman   [PATCH] pid: Impl...
911
912
  		 * The old leader becomes a thread of the this thread group.
  		 * Note: The old leader also uses this pid until release_task
d73d65293   Eric W. Biederman   [PATCH] pidhash: ...
913
914
  		 *       is called.  Odd but simple and correct.
  		 */
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
915
916
  		detach_pid(tsk, PIDTYPE_PID);
  		tsk->pid = leader->pid;
3743ca05f   Sukadev Bhattiprolu   pid namespaces: u...
917
  		attach_pid(tsk, PIDTYPE_PID,  task_pid(leader));
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
918
919
  		transfer_pid(leader, tsk, PIDTYPE_PGID);
  		transfer_pid(leader, tsk, PIDTYPE_SID);
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
920

aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
921
  		list_replace_rcu(&leader->tasks, &tsk->tasks);
9cd80bbb0   Oleg Nesterov   do_wait() optimiz...
922
  		list_replace_init(&leader->sibling, &tsk->sibling);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
923

aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
924
925
  		tsk->group_leader = tsk;
  		leader->group_leader = tsk;
de12a7878   Eric W. Biederman   [PATCH] de_thread...
926

aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
927
  		tsk->exit_signal = SIGCHLD;
087806b12   Oleg Nesterov   redefine thread_g...
928
  		leader->exit_signal = -1;
962b564cf   Oleg Nesterov   [PATCH] fix do_wa...
929
930
931
  
  		BUG_ON(leader->exit_state != EXIT_ZOMBIE);
  		leader->exit_state = EXIT_DEAD;
eac1b5e57   Oleg Nesterov   ptrace: do_wait(t...
932
933
934
935
936
937
938
939
  
  		/*
  		 * We are going to release_task()->ptrace_unlink() silently,
  		 * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
  		 * the tracer wont't block again waiting for this thread.
  		 */
  		if (unlikely(leader->ptrace))
  			__wake_up_parent(leader, leader->parent);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
940
  		write_unlock_irq(&tasklist_lock);
8187926bd   Oleg Nesterov   posix-timers: sim...
941
942
  
  		release_task(leader);
ed5d2cac1   Oleg Nesterov   exec: rework the ...
943
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
944

6db840fa7   Oleg Nesterov   exec: RT sub-thre...
945
946
  	sig->group_exit_task = NULL;
  	sig->notify_count = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
947
948
  
  no_thread_group:
1f10206cf   Jiri Pirko   getrusage: fill r...
949
950
  	if (current->mm)
  		setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
951
  	exit_itimers(sig);
cbaffba12   Oleg Nesterov   posix timers: dis...
952
  	flush_itimer_signals();
329f7dba5   Oleg Nesterov   [PATCH] fix de_th...
953

b2c903b87   Oleg Nesterov   exec: simplify th...
954
955
  	if (atomic_read(&oldsighand->count) != 1) {
  		struct sighand_struct *newsighand;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
956
  		/*
b2c903b87   Oleg Nesterov   exec: simplify th...
957
958
  		 * This ->sighand is shared with the CLONE_SIGHAND
  		 * but not CLONE_THREAD task, switch to the new one.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
959
  		 */
b2c903b87   Oleg Nesterov   exec: simplify th...
960
961
962
  		newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
  		if (!newsighand)
  			return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
963
964
965
966
967
968
  		atomic_set(&newsighand->count, 1);
  		memcpy(newsighand->action, oldsighand->action,
  		       sizeof(newsighand->action));
  
  		write_lock_irq(&tasklist_lock);
  		spin_lock(&oldsighand->siglock);
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
969
  		rcu_assign_pointer(tsk->sighand, newsighand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
970
971
  		spin_unlock(&oldsighand->siglock);
  		write_unlock_irq(&tasklist_lock);
fba2afaae   Davide Libenzi   signal/timer/even...
972
  		__cleanup_sighand(oldsighand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
973
  	}
aafe6c2a2   Eric W. Biederman   [PATCH] de_thread...
974
  	BUG_ON(!thread_group_leader(tsk));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975
976
  	return 0;
  }
0840a90d9   Oleg Nesterov   exec: simplify ->...
977

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
978
979
980
981
  /*
   * These functions flushes out all traces of the currently running executable
   * so that a new one can be started
   */
858119e15   Arjan van de Ven   [PATCH] Unlinline...
982
  static void flush_old_files(struct files_struct * files)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
983
984
  {
  	long j = -1;
badf16621   Dipankar Sarma   [PATCH] files: br...
985
  	struct fdtable *fdt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
986
987
988
989
990
991
992
  
  	spin_lock(&files->file_lock);
  	for (;;) {
  		unsigned long set, i;
  
  		j++;
  		i = j * __NFDBITS;
badf16621   Dipankar Sarma   [PATCH] files: br...
993
  		fdt = files_fdtable(files);
bbea9f696   Vadim Lobanov   [PATCH] fdtable: ...
994
  		if (i >= fdt->max_fds)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
995
  			break;
badf16621   Dipankar Sarma   [PATCH] files: br...
996
  		set = fdt->close_on_exec->fds_bits[j];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
997
998
  		if (!set)
  			continue;
badf16621   Dipankar Sarma   [PATCH] files: br...
999
  		fdt->close_on_exec->fds_bits[j] = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
  		spin_unlock(&files->file_lock);
  		for ( ; set ; i++,set >>= 1) {
  			if (set & 1) {
  				sys_close(i);
  			}
  		}
  		spin_lock(&files->file_lock);
  
  	}
  	spin_unlock(&files->file_lock);
  }
59714d65d   Andrew Morton   get_task_comm(): ...
1011
  char *get_task_comm(char *buf, struct task_struct *tsk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1012
1013
1014
1015
1016
  {
  	/* buf must be at least sizeof(tsk->comm) in size */
  	task_lock(tsk);
  	strncpy(buf, tsk->comm, sizeof(tsk->comm));
  	task_unlock(tsk);
59714d65d   Andrew Morton   get_task_comm(): ...
1017
  	return buf;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1018
  }
7d74f492e   J Freyensee   export kernel cal...
1019
  EXPORT_SYMBOL_GPL(get_task_comm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1020
1021
1022
1023
  
  void set_task_comm(struct task_struct *tsk, char *buf)
  {
  	task_lock(tsk);
4614a696b   John Stultz   procfs: allow thr...
1024
1025
1026
1027
1028
1029
1030
1031
1032
  
  	/*
  	 * Threads may access current->comm without holding
  	 * the task lock, so write the string carefully.
  	 * Readers without a lock may see incomplete new
  	 * names but are safe from non-terminating string reads.
  	 */
  	memset(tsk->comm, 0, TASK_COMM_LEN);
  	wmb();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1033
1034
  	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
  	task_unlock(tsk);
cdd6c482c   Ingo Molnar   perf: Do the big ...
1035
  	perf_event_comm(tsk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1036
1037
1038
1039
  }
  
  int flush_old_exec(struct linux_binprm * bprm)
  {
221af7f87   Linus Torvalds   Split 'flush_old_...
1040
  	int retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1041
1042
1043
1044
1045
1046
1047
1048
  
  	/*
  	 * Make sure we have a private signal table and that
  	 * we are unassociated from the previous thread group.
  	 */
  	retval = de_thread(current);
  	if (retval)
  		goto out;
925d1c401   Matt Helsley   procfs task exe s...
1049
  	set_mm_exe_file(bprm->mm, bprm->file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1050
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1051
1052
  	 * Release all of the old mmap stuff
  	 */
3c77f8457   Oleg Nesterov   exec: make argv/e...
1053
  	acct_arg_size(bprm, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1054
1055
  	retval = exec_mmap(bprm->mm);
  	if (retval)
fd8328be8   Al Viro   [PATCH] sanitize ...
1056
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1057
1058
  
  	bprm->mm = NULL;		/* We're using it now */
7ab02af42   Linus Torvalds   Fix 'flush_old_ex...
1059

dac853ae8   Mathias Krause   exec: delay addre...
1060
  	set_fs(USER_DS);
98391cf4d   KOSAKI Motohiro   exec: don't turn ...
1061
  	current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
7ab02af42   Linus Torvalds   Fix 'flush_old_ex...
1062
1063
  	flush_thread();
  	current->personality &= ~bprm->per_clear;
221af7f87   Linus Torvalds   Split 'flush_old_...
1064
1065
1066
1067
1068
1069
  	return 0;
  
  out:
  	return retval;
  }
  EXPORT_SYMBOL(flush_old_exec);
1b5d783c9   Al Viro   consolidate BINPR...
1070
1071
1072
1073
1074
1075
  void would_dump(struct linux_binprm *bprm, struct file *file)
  {
  	if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
  		bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
  }
  EXPORT_SYMBOL(would_dump);
221af7f87   Linus Torvalds   Split 'flush_old_...
1076
1077
1078
  void setup_new_exec(struct linux_binprm * bprm)
  {
  	int i, ch;
d7627467b   David Howells   Make do_execve() ...
1079
  	const char *name;
221af7f87   Linus Torvalds   Split 'flush_old_...
1080
1081
1082
  	char tcomm[sizeof(current->comm)];
  
  	arch_pick_mmap_layout(current->mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1083
1084
  
  	/* This is the point of no return */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1085
  	current->sas_ss_sp = current->sas_ss_size = 0;
da9592ede   David Howells   CRED: Wrap task c...
1086
  	if (current_euid() == current_uid() && current_egid() == current_gid())
6c5d52382   Kawai, Hidehiro   coredump masking:...
1087
  		set_dumpable(current->mm, 1);
d6e711448   Alan Cox   [PATCH] setuid co...
1088
  	else
6c5d52382   Kawai, Hidehiro   coredump masking:...
1089
  		set_dumpable(current->mm, suid_dumpable);
d6e711448   Alan Cox   [PATCH] setuid co...
1090

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1091
  	name = bprm->filename;
367720923   Paolo 'Blaisorblade' Giarrusso   [PATCH] comments ...
1092
1093
  
  	/* Copies the binary name from after last slash */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1094
1095
  	for (i=0; (ch = *(name++)) != '\0';) {
  		if (ch == '/')
367720923   Paolo 'Blaisorblade' Giarrusso   [PATCH] comments ...
1096
  			i = 0; /* overwrite what we wrote */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1097
1098
1099
1100
1101
1102
  		else
  			if (i < (sizeof(tcomm) - 1))
  				tcomm[i++] = ch;
  	}
  	tcomm[i] = '\0';
  	set_task_comm(current, tcomm);
0551fbd29   Benjamin Herrenschmidt   [PATCH] Add mm->t...
1103
1104
1105
1106
1107
  	/* Set the new mm task size. We have to do that late because it may
  	 * depend on TIF_32BIT which is only updated in flush_thread() on
  	 * some architectures like powerpc
  	 */
  	current->mm->task_size = TASK_SIZE;
a6f76f23d   David Howells   CRED: Make execve...
1108
1109
1110
  	/* install the new credentials */
  	if (bprm->cred->uid != current_euid() ||
  	    bprm->cred->gid != current_egid()) {
d2d56c5f5   Marcel Holtmann   Reset current->pd...
1111
  		current->pdeath_signal = 0;
1b5d783c9   Al Viro   consolidate BINPR...
1112
1113
1114
1115
  	} else {
  		would_dump(bprm, bprm->file);
  		if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
  			set_dumpable(current->mm, suid_dumpable);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1116
  	}
f65cb45cb   Ingo Molnar   perfcounters: flu...
1117
1118
1119
1120
1121
  	/*
  	 * Flush performance counters when crossing a
  	 * security domain:
  	 */
  	if (!get_dumpable(current->mm))
cdd6c482c   Ingo Molnar   perf: Do the big ...
1122
  		perf_event_exit_task(current);
f65cb45cb   Ingo Molnar   perfcounters: flu...
1123

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1124
1125
1126
1127
1128
1129
1130
  	/* An exec changes our domain. We are no longer part of the thread
  	   group */
  
  	current->self_exec_id++;
  			
  	flush_signal_handlers(current, 0);
  	flush_old_files(current->files);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1131
  }
221af7f87   Linus Torvalds   Split 'flush_old_...
1132
  EXPORT_SYMBOL(setup_new_exec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1133

a6f76f23d   David Howells   CRED: Make execve...
1134
  /*
a2a8474c3   Oleg Nesterov   exec: do not slee...
1135
1136
1137
1138
1139
1140
1141
   * Prepare credentials and lock ->cred_guard_mutex.
   * install_exec_creds() commits the new creds and drops the lock.
   * Or, if exec fails before, free_bprm() should release ->cred and
   * and unlock.
   */
  int prepare_bprm_creds(struct linux_binprm *bprm)
  {
9b1bf12d5   KOSAKI Motohiro   signals: move cre...
1142
  	if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
a2a8474c3   Oleg Nesterov   exec: do not slee...
1143
1144
1145
1146
1147
  		return -ERESTARTNOINTR;
  
  	bprm->cred = prepare_exec_creds();
  	if (likely(bprm->cred))
  		return 0;
9b1bf12d5   KOSAKI Motohiro   signals: move cre...
1148
  	mutex_unlock(&current->signal->cred_guard_mutex);
a2a8474c3   Oleg Nesterov   exec: do not slee...
1149
1150
1151
1152
1153
1154
1155
  	return -ENOMEM;
  }
  
  void free_bprm(struct linux_binprm *bprm)
  {
  	free_arg_pages(bprm);
  	if (bprm->cred) {
9b1bf12d5   KOSAKI Motohiro   signals: move cre...
1156
  		mutex_unlock(&current->signal->cred_guard_mutex);
a2a8474c3   Oleg Nesterov   exec: do not slee...
1157
1158
1159
1160
1161
1162
  		abort_creds(bprm->cred);
  	}
  	kfree(bprm);
  }
  
  /*
a6f76f23d   David Howells   CRED: Make execve...
1163
1164
1165
1166
1167
1168
1169
1170
   * install the new credentials for this executable
   */
  void install_exec_creds(struct linux_binprm *bprm)
  {
  	security_bprm_committing_creds(bprm);
  
  	commit_creds(bprm->cred);
  	bprm->cred = NULL;
a2a8474c3   Oleg Nesterov   exec: do not slee...
1171
1172
  	/*
  	 * cred_guard_mutex must be held at least to this point to prevent
a6f76f23d   David Howells   CRED: Make execve...
1173
  	 * ptrace_attach() from altering our determination of the task's
a2a8474c3   Oleg Nesterov   exec: do not slee...
1174
1175
  	 * credentials; any time after this it may be unlocked.
  	 */
a6f76f23d   David Howells   CRED: Make execve...
1176
  	security_bprm_committed_creds(bprm);
9b1bf12d5   KOSAKI Motohiro   signals: move cre...
1177
  	mutex_unlock(&current->signal->cred_guard_mutex);
a6f76f23d   David Howells   CRED: Make execve...
1178
1179
1180
1181
1182
  }
  EXPORT_SYMBOL(install_exec_creds);
  
  /*
   * determine how safe it is to execute the proposed program
9b1bf12d5   KOSAKI Motohiro   signals: move cre...
1183
   * - the caller must hold ->cred_guard_mutex to protect against
a6f76f23d   David Howells   CRED: Make execve...
1184
1185
   *   PTRACE_ATTACH
   */
498052bba   Al Viro   New locking/refco...
1186
  int check_unsafe_exec(struct linux_binprm *bprm)
a6f76f23d   David Howells   CRED: Make execve...
1187
  {
0bf2f3aec   David Howells   CRED: Fix SUID ex...
1188
  	struct task_struct *p = current, *t;
f1191b50e   Al Viro   check_unsafe_exec...
1189
  	unsigned n_fs;
498052bba   Al Viro   New locking/refco...
1190
  	int res = 0;
a6f76f23d   David Howells   CRED: Make execve...
1191

4b9d33e6d   Tejun Heo   ptrace: kill clon...
1192
1193
1194
1195
1196
1197
  	if (p->ptrace) {
  		if (p->ptrace & PT_PTRACE_CAP)
  			bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
  		else
  			bprm->unsafe |= LSM_UNSAFE_PTRACE;
  	}
a6f76f23d   David Howells   CRED: Make execve...
1198

0bf2f3aec   David Howells   CRED: Fix SUID ex...
1199
  	n_fs = 1;
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1200
  	spin_lock(&p->fs->lock);
437f7fdb6   Oleg Nesterov   check_unsafe_exec...
1201
  	rcu_read_lock();
0bf2f3aec   David Howells   CRED: Fix SUID ex...
1202
1203
1204
  	for (t = next_thread(p); t != p; t = next_thread(t)) {
  		if (t->fs == p->fs)
  			n_fs++;
0bf2f3aec   David Howells   CRED: Fix SUID ex...
1205
  	}
437f7fdb6   Oleg Nesterov   check_unsafe_exec...
1206
  	rcu_read_unlock();
0bf2f3aec   David Howells   CRED: Fix SUID ex...
1207

f1191b50e   Al Viro   check_unsafe_exec...
1208
  	if (p->fs->users > n_fs) {
a6f76f23d   David Howells   CRED: Make execve...
1209
  		bprm->unsafe |= LSM_UNSAFE_SHARE;
498052bba   Al Viro   New locking/refco...
1210
  	} else {
8c652f96d   Oleg Nesterov   do_execve() must ...
1211
1212
1213
1214
1215
  		res = -EAGAIN;
  		if (!p->fs->in_exec) {
  			p->fs->in_exec = 1;
  			res = 1;
  		}
498052bba   Al Viro   New locking/refco...
1216
  	}
2a4419b5b   Nick Piggin   fs: fs_struct rwl...
1217
  	spin_unlock(&p->fs->lock);
498052bba   Al Viro   New locking/refco...
1218
1219
  
  	return res;
a6f76f23d   David Howells   CRED: Make execve...
1220
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1221
1222
1223
  /* 
   * Fill the binprm structure from the inode. 
   * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
a6f76f23d   David Howells   CRED: Make execve...
1224
1225
   *
   * This may be called multiple times for binary chains (scripts for example).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1226
1227
1228
   */
  int prepare_binprm(struct linux_binprm *bprm)
  {
a6f76f23d   David Howells   CRED: Make execve...
1229
  	umode_t mode;
0f7fc9e4d   Josef "Jeff" Sipek   [PATCH] VFS: chan...
1230
  	struct inode * inode = bprm->file->f_path.dentry->d_inode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1231
1232
1233
  	int retval;
  
  	mode = inode->i_mode;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1234
1235
  	if (bprm->file->f_op == NULL)
  		return -EACCES;
a6f76f23d   David Howells   CRED: Make execve...
1236
1237
1238
  	/* clear any previous set[ug]id data from a previous binary */
  	bprm->cred->euid = current_euid();
  	bprm->cred->egid = current_egid();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1239

a6f76f23d   David Howells   CRED: Make execve...
1240
  	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1241
1242
  		/* Set-uid? */
  		if (mode & S_ISUID) {
a6f76f23d   David Howells   CRED: Make execve...
1243
1244
  			bprm->per_clear |= PER_CLEAR_ON_SETID;
  			bprm->cred->euid = inode->i_uid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1245
1246
1247
1248
1249
1250
1251
1252
1253
  		}
  
  		/* Set-gid? */
  		/*
  		 * If setgid is set but no group execute bit then this
  		 * is a candidate for mandatory locking, not a setgid
  		 * executable.
  		 */
  		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
a6f76f23d   David Howells   CRED: Make execve...
1254
1255
  			bprm->per_clear |= PER_CLEAR_ON_SETID;
  			bprm->cred->egid = inode->i_gid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1256
1257
1258
1259
  		}
  	}
  
  	/* fill in binprm security blob */
a6f76f23d   David Howells   CRED: Make execve...
1260
  	retval = security_bprm_set_creds(bprm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261
1262
  	if (retval)
  		return retval;
a6f76f23d   David Howells   CRED: Make execve...
1263
  	bprm->cred_prepared = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1264

a6f76f23d   David Howells   CRED: Make execve...
1265
1266
  	memset(bprm->buf, 0, BINPRM_BUF_SIZE);
  	return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1267
1268
1269
  }
  
  EXPORT_SYMBOL(prepare_binprm);
4fc75ff48   Nick Piggin   exec: fix remove_...
1270
1271
1272
1273
1274
  /*
   * Arguments are '\0' separated strings found at the location bprm->p
   * points to; chop off the first by relocating brpm->p to right after
   * the first '\0' encountered.
   */
b6a2fea39   Ollie Wild   mm: variable leng...
1275
  int remove_arg_zero(struct linux_binprm *bprm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1276
  {
b6a2fea39   Ollie Wild   mm: variable leng...
1277
1278
1279
1280
  	int ret = 0;
  	unsigned long offset;
  	char *kaddr;
  	struct page *page;
4fc75ff48   Nick Piggin   exec: fix remove_...
1281

b6a2fea39   Ollie Wild   mm: variable leng...
1282
1283
  	if (!bprm->argc)
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284

b6a2fea39   Ollie Wild   mm: variable leng...
1285
1286
1287
1288
1289
1290
1291
1292
  	do {
  		offset = bprm->p & ~PAGE_MASK;
  		page = get_arg_page(bprm, bprm->p, 0);
  		if (!page) {
  			ret = -EFAULT;
  			goto out;
  		}
  		kaddr = kmap_atomic(page, KM_USER0);
4fc75ff48   Nick Piggin   exec: fix remove_...
1293

b6a2fea39   Ollie Wild   mm: variable leng...
1294
1295
1296
  		for (; offset < PAGE_SIZE && kaddr[offset];
  				offset++, bprm->p++)
  			;
4fc75ff48   Nick Piggin   exec: fix remove_...
1297

b6a2fea39   Ollie Wild   mm: variable leng...
1298
1299
  		kunmap_atomic(kaddr, KM_USER0);
  		put_arg_page(page);
4fc75ff48   Nick Piggin   exec: fix remove_...
1300

b6a2fea39   Ollie Wild   mm: variable leng...
1301
1302
1303
  		if (offset == PAGE_SIZE)
  			free_arg_page(bprm, (bprm->p >> PAGE_SHIFT) - 1);
  	} while (offset == PAGE_SIZE);
4fc75ff48   Nick Piggin   exec: fix remove_...
1304

b6a2fea39   Ollie Wild   mm: variable leng...
1305
1306
1307
  	bprm->p++;
  	bprm->argc--;
  	ret = 0;
4fc75ff48   Nick Piggin   exec: fix remove_...
1308

b6a2fea39   Ollie Wild   mm: variable leng...
1309
1310
  out:
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1311
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1312
1313
1314
1315
1316
1317
1318
  EXPORT_SYMBOL(remove_arg_zero);
  
  /*
   * cycle the list of binary formats handler, until one recognizes the image
   */
  int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
  {
85f334666   Roland McGrath   tracehook: exec d...
1319
  	unsigned int depth = bprm->recursion_depth;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1320
1321
  	int try,retval;
  	struct linux_binfmt *fmt;
bb188d7e6   Denys Vlasenko   ptrace: make form...
1322
  	pid_t old_pid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1323

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1324
1325
1326
  	retval = security_bprm_check(bprm);
  	if (retval)
  		return retval;
473ae30bc   Al Viro   [PATCH] execve ar...
1327
1328
1329
  	retval = audit_bprm(bprm);
  	if (retval)
  		return retval;
bb188d7e6   Denys Vlasenko   ptrace: make form...
1330
1331
1332
1333
  	/* Need to fetch pid before load_binary changes it */
  	rcu_read_lock();
  	old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
  	rcu_read_unlock();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1334
1335
1336
  	retval = -ENOENT;
  	for (try=0; try<2; try++) {
  		read_lock(&binfmt_lock);
e4dc1b14d   Alexey Dobriyan   Use list_head in ...
1337
  		list_for_each_entry(fmt, &formats, lh) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1338
1339
1340
1341
1342
1343
1344
  			int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
  			if (!fn)
  				continue;
  			if (!try_module_get(fmt->module))
  				continue;
  			read_unlock(&binfmt_lock);
  			retval = fn(bprm, regs);
85f334666   Roland McGrath   tracehook: exec d...
1345
1346
1347
1348
1349
1350
  			/*
  			 * Restore the depth counter to its starting value
  			 * in this call, so we don't have to rely on every
  			 * load_binary function to restore it on return.
  			 */
  			bprm->recursion_depth = depth;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1351
  			if (retval >= 0) {
85f334666   Roland McGrath   tracehook: exec d...
1352
  				if (depth == 0)
bb188d7e6   Denys Vlasenko   ptrace: make form...
1353
1354
  					ptrace_event(PTRACE_EVENT_EXEC,
  							old_pid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1355
1356
1357
1358
1359
1360
  				put_binfmt(fmt);
  				allow_write_access(bprm->file);
  				if (bprm->file)
  					fput(bprm->file);
  				bprm->file = NULL;
  				current->did_exec = 1;
9f46080c4   Matt Helsley   [PATCH] Process E...
1361
  				proc_exec_connector(current);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
  				return retval;
  			}
  			read_lock(&binfmt_lock);
  			put_binfmt(fmt);
  			if (retval != -ENOEXEC || bprm->mm == NULL)
  				break;
  			if (!bprm->file) {
  				read_unlock(&binfmt_lock);
  				return retval;
  			}
  		}
  		read_unlock(&binfmt_lock);
b4edf8bd0   Tetsuo Handa   exec: do not retr...
1374
  #ifdef CONFIG_MODULES
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
1376
  		if (retval != -ENOEXEC || bprm->mm == NULL) {
  			break;
5f4123be3   Johannes Berg   remove CONFIG_KMO...
1377
  		} else {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1378
1379
1380
1381
1382
1383
1384
  #define printable(c) (((c)=='\t') || ((c)=='
  ') || (0x20<=(c) && (c)<=0x7e))
  			if (printable(bprm->buf[0]) &&
  			    printable(bprm->buf[1]) &&
  			    printable(bprm->buf[2]) &&
  			    printable(bprm->buf[3]))
  				break; /* -ENOEXEC */
912193521   Tetsuo Handa   exec: do not call...
1385
1386
  			if (try)
  				break; /* -ENOEXEC */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1387
  			request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1388
  		}
b4edf8bd0   Tetsuo Handa   exec: do not retr...
1389
1390
1391
  #else
  		break;
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392
1393
1394
1395
1396
1397
1398
1399
1400
  	}
  	return retval;
  }
  
  EXPORT_SYMBOL(search_binary_handler);
  
  /*
   * sys_execve() executes a new program.
   */
ba2d01629   Oleg Nesterov   exec: introduce s...
1401
1402
1403
1404
  static int do_execve_common(const char *filename,
  				struct user_arg_ptr argv,
  				struct user_arg_ptr envp,
  				struct pt_regs *regs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1405
1406
1407
  {
  	struct linux_binprm *bprm;
  	struct file *file;
3b1253880   Al Viro   [PATCH] sanitize ...
1408
  	struct files_struct *displaced;
8c652f96d   Oleg Nesterov   do_execve() must ...
1409
  	bool clear_in_exec;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1410
  	int retval;
72fa59970   Vasiliy Kulikov   move RLIMIT_NPROC...
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
  	const struct cred *cred = current_cred();
  
  	/*
  	 * We move the actual failure in case of RLIMIT_NPROC excess from
  	 * set*uid() to execve() because too many poorly written programs
  	 * don't check setuid() return code.  Here we additionally recheck
  	 * whether NPROC limit is still exceeded.
  	 */
  	if ((current->flags & PF_NPROC_EXCEEDED) &&
  	    atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) {
  		retval = -EAGAIN;
  		goto out_ret;
  	}
  
  	/* We're below the limit (still or again), so we don't want to make
  	 * further execve() calls fail. */
  	current->flags &= ~PF_NPROC_EXCEEDED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1428

3b1253880   Al Viro   [PATCH] sanitize ...
1429
  	retval = unshare_files(&displaced);
fd8328be8   Al Viro   [PATCH] sanitize ...
1430
1431
  	if (retval)
  		goto out_ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1432
  	retval = -ENOMEM;
11b0b5abb   Oliver Neukum   [PATCH] use kzall...
1433
  	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1434
  	if (!bprm)
fd8328be8   Al Viro   [PATCH] sanitize ...
1435
  		goto out_files;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1436

a2a8474c3   Oleg Nesterov   exec: do not slee...
1437
1438
  	retval = prepare_bprm_creds(bprm);
  	if (retval)
a6f76f23d   David Howells   CRED: Make execve...
1439
  		goto out_free;
498052bba   Al Viro   New locking/refco...
1440
1441
  
  	retval = check_unsafe_exec(bprm);
8c652f96d   Oleg Nesterov   do_execve() must ...
1442
  	if (retval < 0)
a2a8474c3   Oleg Nesterov   exec: do not slee...
1443
  		goto out_free;
8c652f96d   Oleg Nesterov   do_execve() must ...
1444
  	clear_in_exec = retval;
a2a8474c3   Oleg Nesterov   exec: do not slee...
1445
  	current->in_execve = 1;
a6f76f23d   David Howells   CRED: Make execve...
1446

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1447
1448
1449
  	file = open_exec(filename);
  	retval = PTR_ERR(file);
  	if (IS_ERR(file))
498052bba   Al Viro   New locking/refco...
1450
  		goto out_unmark;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1451
1452
  
  	sched_exec();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1453
1454
1455
  	bprm->file = file;
  	bprm->filename = filename;
  	bprm->interp = filename;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1456

b6a2fea39   Ollie Wild   mm: variable leng...
1457
1458
1459
  	retval = bprm_mm_init(bprm);
  	if (retval)
  		goto out_file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1460

b6a2fea39   Ollie Wild   mm: variable leng...
1461
  	bprm->argc = count(argv, MAX_ARG_STRINGS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1462
  	if ((retval = bprm->argc) < 0)
a6f76f23d   David Howells   CRED: Make execve...
1463
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1464

b6a2fea39   Ollie Wild   mm: variable leng...
1465
  	bprm->envc = count(envp, MAX_ARG_STRINGS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1466
  	if ((retval = bprm->envc) < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
  		goto out;
  
  	retval = prepare_binprm(bprm);
  	if (retval < 0)
  		goto out;
  
  	retval = copy_strings_kernel(1, &bprm->filename, bprm);
  	if (retval < 0)
  		goto out;
  
  	bprm->exec = bprm->p;
  	retval = copy_strings(bprm->envc, envp, bprm);
  	if (retval < 0)
  		goto out;
  
  	retval = copy_strings(bprm->argc, argv, bprm);
  	if (retval < 0)
  		goto out;
  
  	retval = search_binary_handler(bprm,regs);
a6f76f23d   David Howells   CRED: Make execve...
1487
1488
  	if (retval < 0)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1489

a6f76f23d   David Howells   CRED: Make execve...
1490
  	/* execve succeeded */
498052bba   Al Viro   New locking/refco...
1491
  	current->fs->in_exec = 0;
f9ce1f1cd   Kentaro Takeda   Add in_execve fla...
1492
  	current->in_execve = 0;
a6f76f23d   David Howells   CRED: Make execve...
1493
1494
1495
1496
1497
  	acct_update_integrals(current);
  	free_bprm(bprm);
  	if (displaced)
  		put_files_struct(displaced);
  	return retval;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1498

a6f76f23d   David Howells   CRED: Make execve...
1499
  out:
3c77f8457   Oleg Nesterov   exec: make argv/e...
1500
1501
1502
1503
  	if (bprm->mm) {
  		acct_arg_size(bprm, 0);
  		mmput(bprm->mm);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1504
1505
1506
1507
1508
1509
  
  out_file:
  	if (bprm->file) {
  		allow_write_access(bprm->file);
  		fput(bprm->file);
  	}
a6f76f23d   David Howells   CRED: Make execve...
1510

498052bba   Al Viro   New locking/refco...
1511
  out_unmark:
8c652f96d   Oleg Nesterov   do_execve() must ...
1512
1513
  	if (clear_in_exec)
  		current->fs->in_exec = 0;
f9ce1f1cd   Kentaro Takeda   Add in_execve fla...
1514
  	current->in_execve = 0;
a6f76f23d   David Howells   CRED: Make execve...
1515
1516
  
  out_free:
08a6fac1c   Al Viro   [PATCH] get rid o...
1517
  	free_bprm(bprm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1518

fd8328be8   Al Viro   [PATCH] sanitize ...
1519
  out_files:
3b1253880   Al Viro   [PATCH] sanitize ...
1520
1521
  	if (displaced)
  		reset_files_struct(displaced);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1522
1523
1524
  out_ret:
  	return retval;
  }
ba2d01629   Oleg Nesterov   exec: introduce s...
1525
1526
1527
1528
1529
  int do_execve(const char *filename,
  	const char __user *const __user *__argv,
  	const char __user *const __user *__envp,
  	struct pt_regs *regs)
  {
0e028465d   Oleg Nesterov   exec: unify do_ex...
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
  	struct user_arg_ptr argv = { .ptr.native = __argv };
  	struct user_arg_ptr envp = { .ptr.native = __envp };
  	return do_execve_common(filename, argv, envp, regs);
  }
  
  #ifdef CONFIG_COMPAT
  int compat_do_execve(char *filename,
  	compat_uptr_t __user *__argv,
  	compat_uptr_t __user *__envp,
  	struct pt_regs *regs)
  {
  	struct user_arg_ptr argv = {
  		.is_compat = true,
  		.ptr.compat = __argv,
  	};
  	struct user_arg_ptr envp = {
  		.is_compat = true,
  		.ptr.compat = __envp,
  	};
ba2d01629   Oleg Nesterov   exec: introduce s...
1549
1550
  	return do_execve_common(filename, argv, envp, regs);
  }
0e028465d   Oleg Nesterov   exec: unify do_ex...
1551
  #endif
ba2d01629   Oleg Nesterov   exec: introduce s...
1552

964ee7df9   Oleg Nesterov   exec: fix set_bin...
1553
  void set_binfmt(struct linux_binfmt *new)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1554
  {
801460d0c   Hiroshi Shimamoto   task_struct clean...
1555
1556
1557
1558
  	struct mm_struct *mm = current->mm;
  
  	if (mm->binfmt)
  		module_put(mm->binfmt->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1559

801460d0c   Hiroshi Shimamoto   task_struct clean...
1560
  	mm->binfmt = new;
964ee7df9   Oleg Nesterov   exec: fix set_bin...
1561
1562
  	if (new)
  		__module_get(new->module);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1563
1564
1565
  }
  
  EXPORT_SYMBOL(set_binfmt);
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
  static int expand_corename(struct core_name *cn)
  {
  	char *old_corename = cn->corename;
  
  	cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
  	cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
  
  	if (!cn->corename) {
  		kfree(old_corename);
  		return -ENOMEM;
  	}
  
  	return 0;
  }
  
  static int cn_printf(struct core_name *cn, const char *fmt, ...)
  {
  	char *cur;
  	int need;
  	int ret;
  	va_list arg;
  
  	va_start(arg, fmt);
  	need = vsnprintf(NULL, 0, fmt, arg);
  	va_end(arg);
  
  	if (likely(need < cn->size - cn->used - 1))
  		goto out_printf;
  
  	ret = expand_corename(cn);
  	if (ret)
  		goto expand_fail;
  
  out_printf:
  	cur = cn->corename + cn->used;
  	va_start(arg, fmt);
  	vsnprintf(cur, need + 1, fmt, arg);
  	va_end(arg);
  	cn->used += need;
  	return 0;
  
  expand_fail:
  	return ret;
  }
2c563731f   Jiri Slaby   coredump: escape ...
1610
1611
1612
1613
1614
1615
  static void cn_escape(char *str)
  {
  	for (; *str; str++)
  		if (*str == '/')
  			*str = '!';
  }
57cc083ad   Jiri Slaby   coredump: add sup...
1616
1617
1618
  static int cn_print_exe_file(struct core_name *cn)
  {
  	struct file *exe_file;
2c563731f   Jiri Slaby   coredump: escape ...
1619
  	char *pathbuf, *path;
57cc083ad   Jiri Slaby   coredump: add sup...
1620
1621
1622
  	int ret;
  
  	exe_file = get_mm_exe_file(current->mm);
2c563731f   Jiri Slaby   coredump: escape ...
1623
1624
1625
1626
1627
1628
  	if (!exe_file) {
  		char *commstart = cn->corename + cn->used;
  		ret = cn_printf(cn, "%s (path unknown)", current->comm);
  		cn_escape(commstart);
  		return ret;
  	}
57cc083ad   Jiri Slaby   coredump: add sup...
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
  
  	pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
  	if (!pathbuf) {
  		ret = -ENOMEM;
  		goto put_exe_file;
  	}
  
  	path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
  	if (IS_ERR(path)) {
  		ret = PTR_ERR(path);
  		goto free_buf;
  	}
2c563731f   Jiri Slaby   coredump: escape ...
1641
  	cn_escape(path);
57cc083ad   Jiri Slaby   coredump: add sup...
1642
1643
1644
1645
1646
1647
1648
1649
1650
  
  	ret = cn_printf(cn, "%s", path);
  
  free_buf:
  	kfree(pathbuf);
  put_exe_file:
  	fput(exe_file);
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1651
1652
1653
1654
  /* format_corename will inspect the pattern parameter, and output a
   * name into corename, which must have space for at least
   * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
   */
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1655
  static int format_corename(struct core_name *cn, long signr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1656
  {
86a264abe   David Howells   CRED: Wrap curren...
1657
  	const struct cred *cred = current_cred();
565b9b14e   Oleg Nesterov   coredump: format_...
1658
1659
  	const char *pat_ptr = core_pattern;
  	int ispipe = (*pat_ptr == '|');
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1660
  	int pid_in_pattern = 0;
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1661
1662
1663
1664
1665
1666
1667
1668
  	int err = 0;
  
  	cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
  	cn->corename = kmalloc(cn->size, GFP_KERNEL);
  	cn->used = 0;
  
  	if (!cn->corename)
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1669
1670
1671
1672
1673
  
  	/* Repeat as long as we have more pattern to process and more output
  	   space */
  	while (*pat_ptr) {
  		if (*pat_ptr != '%') {
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1674
  			if (*pat_ptr == 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1675
  				goto out;
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1676
  			err = cn_printf(cn, "%c", *pat_ptr++);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1677
1678
  		} else {
  			switch (*++pat_ptr) {
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1679
  			/* single % at the end, drop that */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1680
1681
1682
1683
  			case 0:
  				goto out;
  			/* Double percent, output one percent */
  			case '%':
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1684
  				err = cn_printf(cn, "%c", '%');
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1685
1686
1687
1688
  				break;
  			/* pid */
  			case 'p':
  				pid_in_pattern = 1;
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1689
1690
  				err = cn_printf(cn, "%d",
  					      task_tgid_vnr(current));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1691
1692
1693
  				break;
  			/* uid */
  			case 'u':
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1694
  				err = cn_printf(cn, "%d", cred->uid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1695
1696
1697
  				break;
  			/* gid */
  			case 'g':
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1698
  				err = cn_printf(cn, "%d", cred->gid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1699
1700
1701
  				break;
  			/* signal that caused the coredump */
  			case 's':
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1702
  				err = cn_printf(cn, "%ld", signr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1703
1704
1705
1706
1707
  				break;
  			/* UNIX time of coredump */
  			case 't': {
  				struct timeval tv;
  				do_gettimeofday(&tv);
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1708
  				err = cn_printf(cn, "%lu", tv.tv_sec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1709
1710
1711
  				break;
  			}
  			/* hostname */
2c563731f   Jiri Slaby   coredump: escape ...
1712
1713
  			case 'h': {
  				char *namestart = cn->corename + cn->used;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1714
  				down_read(&uts_sem);
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1715
1716
  				err = cn_printf(cn, "%s",
  					      utsname()->nodename);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1717
  				up_read(&uts_sem);
2c563731f   Jiri Slaby   coredump: escape ...
1718
  				cn_escape(namestart);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1719
  				break;
2c563731f   Jiri Slaby   coredump: escape ...
1720
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1721
  			/* executable */
2c563731f   Jiri Slaby   coredump: escape ...
1722
1723
  			case 'e': {
  				char *commstart = cn->corename + cn->used;
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1724
  				err = cn_printf(cn, "%s", current->comm);
2c563731f   Jiri Slaby   coredump: escape ...
1725
  				cn_escape(commstart);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1726
  				break;
2c563731f   Jiri Slaby   coredump: escape ...
1727
  			}
57cc083ad   Jiri Slaby   coredump: add sup...
1728
1729
1730
  			case 'E':
  				err = cn_print_exe_file(cn);
  				break;
74aadce98   Neil Horman   core_pattern: all...
1731
1732
  			/* core limit size */
  			case 'c':
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1733
1734
  				err = cn_printf(cn, "%lu",
  					      rlimit(RLIMIT_CORE));
74aadce98   Neil Horman   core_pattern: all...
1735
  				break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1736
1737
1738
1739
1740
  			default:
  				break;
  			}
  			++pat_ptr;
  		}
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1741
1742
1743
  
  		if (err)
  			return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1744
  	}
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1745

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1746
1747
1748
1749
  	/* Backward compatibility with core_uses_pid:
  	 *
  	 * If core_pattern does not include a %p (as is the default)
  	 * and core_uses_pid is set, then .%pid will be appended to
c4bbafda7   Alan Cox   exec.c: fix cored...
1750
  	 * the filename. Do not do this for piped commands. */
6409324b3   Oleg Nesterov   coredump: format_...
1751
  	if (!ispipe && !pid_in_pattern && core_uses_pid) {
1b0d300bd   Xiaotian Feng   core_pattern: fix...
1752
1753
1754
  		err = cn_printf(cn, ".%d", task_tgid_vnr(current));
  		if (err)
  			return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1755
  	}
c4bbafda7   Alan Cox   exec.c: fix cored...
1756
  out:
c4bbafda7   Alan Cox   exec.c: fix cored...
1757
  	return ispipe;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1758
  }
5c99cbf49   Oleg Nesterov   coredump: set ->g...
1759
  static int zap_process(struct task_struct *start, int exit_code)
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1760
1761
  {
  	struct task_struct *t;
8cd9c2491   Oleg Nesterov   coredump: simplif...
1762
  	int nr = 0;
281de339c   Oleg Nesterov   [PATCH] coredump:...
1763

d5f70c00a   Oleg Nesterov   [PATCH] coredump:...
1764
  	start->signal->flags = SIGNAL_GROUP_EXIT;
5c99cbf49   Oleg Nesterov   coredump: set ->g...
1765
  	start->signal->group_exit_code = exit_code;
d5f70c00a   Oleg Nesterov   [PATCH] coredump:...
1766
  	start->signal->group_stop_count = 0;
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1767
1768
1769
  
  	t = start;
  	do {
6dfca3298   Tejun Heo   job control: make...
1770
  		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1771
  		if (t != current && t->mm) {
281de339c   Oleg Nesterov   [PATCH] coredump:...
1772
1773
  			sigaddset(&t->pending.signal, SIGKILL);
  			signal_wake_up(t, 1);
8cd9c2491   Oleg Nesterov   coredump: simplif...
1774
  			nr++;
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1775
  		}
e4901f92a   Oleg Nesterov   coredump: zap_thr...
1776
  	} while_each_thread(start, t);
8cd9c2491   Oleg Nesterov   coredump: simplif...
1777
1778
  
  	return nr;
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1779
  }
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1780
  static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
8cd9c2491   Oleg Nesterov   coredump: simplif...
1781
  				struct core_state *core_state, int exit_code)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1782
1783
  {
  	struct task_struct *g, *p;
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1784
  	unsigned long flags;
8cd9c2491   Oleg Nesterov   coredump: simplif...
1785
  	int nr = -EAGAIN;
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1786
1787
  
  	spin_lock_irq(&tsk->sighand->siglock);
ed5d2cac1   Oleg Nesterov   exec: rework the ...
1788
  	if (!signal_group_exit(tsk->signal)) {
8cd9c2491   Oleg Nesterov   coredump: simplif...
1789
  		mm->core_state = core_state;
5c99cbf49   Oleg Nesterov   coredump: set ->g...
1790
  		nr = zap_process(tsk, exit_code);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1791
  	}
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1792
  	spin_unlock_irq(&tsk->sighand->siglock);
8cd9c2491   Oleg Nesterov   coredump: simplif...
1793
1794
  	if (unlikely(nr < 0))
  		return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1795

8cd9c2491   Oleg Nesterov   coredump: simplif...
1796
  	if (atomic_read(&mm->mm_users) == nr + 1)
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1797
  		goto done;
e4901f92a   Oleg Nesterov   coredump: zap_thr...
1798
1799
  	/*
  	 * We should find and kill all tasks which use this mm, and we should
999d9fc16   Oleg Nesterov   coredump: move mm...
1800
  	 * count them correctly into ->nr_threads. We don't take tasklist
e4901f92a   Oleg Nesterov   coredump: zap_thr...
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
  	 * lock, but this is safe wrt:
  	 *
  	 * fork:
  	 *	None of sub-threads can fork after zap_process(leader). All
  	 *	processes which were created before this point should be
  	 *	visible to zap_threads() because copy_process() adds the new
  	 *	process to the tail of init_task.tasks list, and lock/unlock
  	 *	of ->siglock provides a memory barrier.
  	 *
  	 * do_exit:
  	 *	The caller holds mm->mmap_sem. This means that the task which
  	 *	uses this mm can't pass exit_mm(), so it can't exit or clear
  	 *	its ->mm.
  	 *
  	 * de_thread:
  	 *	It does list_replace_rcu(&leader->tasks, &current->tasks),
  	 *	we must see either old or new leader, this does not matter.
  	 *	However, it can change p->sighand, so lock_task_sighand(p)
  	 *	must be used. Since p->mm != NULL and we hold ->mmap_sem
  	 *	it can't fail.
  	 *
  	 *	Note also that "g" can be the old leader with ->mm == NULL
  	 *	and already unhashed and thus removed from ->thread_group.
  	 *	This is OK, __unhash_process()->list_del_rcu() does not
  	 *	clear the ->next pointer, we will find the new leader via
  	 *	next_thread().
  	 */
7b1c6154f   Oleg Nesterov   [PATCH] coredump:...
1828
  	rcu_read_lock();
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1829
  	for_each_process(g) {
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1830
1831
  		if (g == tsk->group_leader)
  			continue;
15b9f360c   Oleg Nesterov   coredump: zap_thr...
1832
1833
  		if (g->flags & PF_KTHREAD)
  			continue;
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1834
1835
1836
  		p = g;
  		do {
  			if (p->mm) {
15b9f360c   Oleg Nesterov   coredump: zap_thr...
1837
  				if (unlikely(p->mm == mm)) {
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1838
  					lock_task_sighand(p, &flags);
5c99cbf49   Oleg Nesterov   coredump: set ->g...
1839
  					nr += zap_process(p, exit_code);
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1840
1841
  					unlock_task_sighand(p, &flags);
  				}
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1842
1843
  				break;
  			}
e4901f92a   Oleg Nesterov   coredump: zap_thr...
1844
  		} while_each_thread(g, p);
aceecc041   Oleg Nesterov   [PATCH] coredump:...
1845
  	}
7b1c6154f   Oleg Nesterov   [PATCH] coredump:...
1846
  	rcu_read_unlock();
5debfa6da   Oleg Nesterov   [PATCH] coredump:...
1847
  done:
c5f1cc8c1   Oleg Nesterov   coredump: turn co...
1848
  	atomic_set(&core_state->nr_threads, nr);
8cd9c2491   Oleg Nesterov   coredump: simplif...
1849
  	return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1850
  }
9d5b327bf   Oleg Nesterov   coredump: make mm...
1851
  static int coredump_wait(int exit_code, struct core_state *core_state)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1852
  {
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1853
1854
  	struct task_struct *tsk = current;
  	struct mm_struct *mm = tsk->mm;
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1855
  	struct completion *vfork_done;
269b005a2   Oleg Nesterov   coredump: shift d...
1856
  	int core_waiters = -EBUSY;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1857

9d5b327bf   Oleg Nesterov   coredump: make mm...
1858
  	init_completion(&core_state->startup);
b564daf80   Oleg Nesterov   coredump: constru...
1859
1860
  	core_state->dumper.task = tsk;
  	core_state->dumper.next = NULL;
269b005a2   Oleg Nesterov   coredump: shift d...
1861
1862
1863
1864
  
  	down_write(&mm->mmap_sem);
  	if (!mm->core_state)
  		core_waiters = zap_threads(tsk, mm, core_state, exit_code);
2384f55f8   Oleg Nesterov   [PATCH] coredump_...
1865
  	up_write(&mm->mmap_sem);
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
  	if (unlikely(core_waiters < 0))
  		goto fail;
  
  	/*
  	 * Make sure nobody is waiting for us to release the VM,
  	 * otherwise we can deadlock when we wait on each other
  	 */
  	vfork_done = tsk->vfork_done;
  	if (vfork_done) {
  		tsk->vfork_done = NULL;
  		complete(vfork_done);
  	}
2384f55f8   Oleg Nesterov   [PATCH] coredump_...
1878
  	if (core_waiters)
9d5b327bf   Oleg Nesterov   coredump: make mm...
1879
  		wait_for_completion(&core_state->startup);
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1880
  fail:
dcf560c59   Oleg Nesterov   [PATCH] coredump:...
1881
  	return core_waiters;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1882
  }
a94e2d408   Oleg Nesterov   coredump: kill mm...
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
  static void coredump_finish(struct mm_struct *mm)
  {
  	struct core_thread *curr, *next;
  	struct task_struct *task;
  
  	next = mm->core_state->dumper.next;
  	while ((curr = next) != NULL) {
  		next = curr->next;
  		task = curr->task;
  		/*
  		 * see exit_mm(), curr->task must not see
  		 * ->task == NULL before we read ->next.
  		 */
  		smp_mb();
  		curr->task = NULL;
  		wake_up_process(task);
  	}
  
  	mm->core_state = NULL;
  }
6c5d52382   Kawai, Hidehiro   coredump masking:...
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
  /*
   * set_dumpable converts traditional three-value dumpable to two flags and
   * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
   * these bits are not changed atomically.  So get_dumpable can observe the
   * intermediate state.  To avoid doing unexpected behavior, get get_dumpable
   * return either old dumpable or new one by paying attention to the order of
   * modifying the bits.
   *
   * dumpable |   mm->flags (binary)
   * old  new | initial interim  final
   * ---------+-----------------------
   *  0    1  |   00      01      01
   *  0    2  |   00      10(*)   11
   *  1    0  |   01      00      00
   *  1    2  |   01      11      11
   *  2    0  |   11      10(*)   00
   *  2    1  |   11      11      01
   *
   * (*) get_dumpable regards interim value of 10 as 11.
   */
  void set_dumpable(struct mm_struct *mm, int value)
  {
  	switch (value) {
  	case 0:
  		clear_bit(MMF_DUMPABLE, &mm->flags);
  		smp_wmb();
  		clear_bit(MMF_DUMP_SECURELY, &mm->flags);
  		break;
  	case 1:
  		set_bit(MMF_DUMPABLE, &mm->flags);
  		smp_wmb();
  		clear_bit(MMF_DUMP_SECURELY, &mm->flags);
  		break;
  	case 2:
  		set_bit(MMF_DUMP_SECURELY, &mm->flags);
  		smp_wmb();
  		set_bit(MMF_DUMPABLE, &mm->flags);
  		break;
  	}
  }
6c5d52382   Kawai, Hidehiro   coredump masking:...
1943

30736a4d4   Masami Hiramatsu   coredump: pass mm...
1944
  static int __get_dumpable(unsigned long mm_flags)
6c5d52382   Kawai, Hidehiro   coredump masking:...
1945
1946
  {
  	int ret;
30736a4d4   Masami Hiramatsu   coredump: pass mm...
1947
  	ret = mm_flags & MMF_DUMPABLE_MASK;
6c5d52382   Kawai, Hidehiro   coredump masking:...
1948
1949
  	return (ret >= 2) ? 2 : ret;
  }
30736a4d4   Masami Hiramatsu   coredump: pass mm...
1950
1951
1952
1953
  int get_dumpable(struct mm_struct *mm)
  {
  	return __get_dumpable(mm->flags);
  }
61be228a0   Neil Horman   exec: allow do_co...
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
  static void wait_for_dump_helpers(struct file *file)
  {
  	struct pipe_inode_info *pipe;
  
  	pipe = file->f_path.dentry->d_inode->i_pipe;
  
  	pipe_lock(pipe);
  	pipe->readers++;
  	pipe->writers--;
  
  	while ((pipe->readers > 1) && (!signal_pending(current))) {
  		wake_up_interruptible_sync(&pipe->wait);
  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  		pipe_wait(pipe);
  	}
  
  	pipe->readers--;
  	pipe->writers++;
  	pipe_unlock(pipe);
  
  }
898b374af   Neil Horman   exec: replace cal...
1975
  /*
1bef82917   Holger Hans Peter Freyther   Small typo fix...
1976
   * umh_pipe_setup
898b374af   Neil Horman   exec: replace cal...
1977
1978
1979
1980
1981
1982
1983
1984
1985
   * helper function to customize the process used
   * to collect the core in userspace.  Specifically
   * it sets up a pipe and installs it as fd 0 (stdin)
   * for the process.  Returns 0 on success, or
   * PTR_ERR on failure.
   * Note that it also sets the core limit to 1.  This
   * is a special value that we use to trap recursive
   * core dumps
   */
879669961   David Howells   KEYS/DNS: Fix ___...
1986
  static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
898b374af   Neil Horman   exec: replace cal...
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
  {
  	struct file *rp, *wp;
  	struct fdtable *fdt;
  	struct coredump_params *cp = (struct coredump_params *)info->data;
  	struct files_struct *cf = current->files;
  
  	wp = create_write_pipe(0);
  	if (IS_ERR(wp))
  		return PTR_ERR(wp);
  
  	rp = create_read_pipe(wp, 0);
  	if (IS_ERR(rp)) {
  		free_write_pipe(wp);
  		return PTR_ERR(rp);
  	}
  
  	cp->file = wp;
  
  	sys_close(0);
  	fd_install(0, rp);
  	spin_lock(&cf->file_lock);
  	fdt = files_fdtable(cf);
  	FD_SET(0, fdt->open_fds);
  	FD_CLR(0, fdt->close_on_exec);
  	spin_unlock(&cf->file_lock);
  
  	/* and disallow core files too */
  	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
  
  	return 0;
  }
8cd3ac3ac   WANG Cong   fs/exec.c: make d...
2018
  void do_coredump(long signr, int exit_code, struct pt_regs *regs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2019
  {
9d5b327bf   Oleg Nesterov   coredump: make mm...
2020
  	struct core_state core_state;
1b0d300bd   Xiaotian Feng   core_pattern: fix...
2021
  	struct core_name cn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2022
2023
  	struct mm_struct *mm = current->mm;
  	struct linux_binfmt * binfmt;
d84f4f992   David Howells   CRED: Inaugurate ...
2024
2025
  	const struct cred *old_cred;
  	struct cred *cred;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2026
  	int retval = 0;
d6e711448   Alan Cox   [PATCH] setuid co...
2027
  	int flag = 0;
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
2028
  	int ispipe;
a293980c2   Neil Horman   exec: let do_core...
2029
  	static atomic_t core_dump_count = ATOMIC_INIT(0);
f6151dfea   Masami Hiramatsu   mm: introduce cor...
2030
2031
2032
  	struct coredump_params cprm = {
  		.signr = signr,
  		.regs = regs,
d554ed895   Jiri Slaby   fs: use rlimit he...
2033
  		.limit = rlimit(RLIMIT_CORE),
30736a4d4   Masami Hiramatsu   coredump: pass mm...
2034
2035
2036
2037
2038
2039
  		/*
  		 * We must use the same mm->flags while dumping core to avoid
  		 * inconsistency of bit flags, since this flag is not protected
  		 * by any locks.
  		 */
  		.mm_flags = mm->flags,
f6151dfea   Masami Hiramatsu   mm: introduce cor...
2040
  	};
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2041

0a4ff8c25   Steve Grubb   [PATCH] Abnormal ...
2042
  	audit_core_dumps(signr);
801460d0c   Hiroshi Shimamoto   task_struct clean...
2043
  	binfmt = mm->binfmt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2044
2045
  	if (!binfmt || !binfmt->core_dump)
  		goto fail;
269b005a2   Oleg Nesterov   coredump: shift d...
2046
2047
  	if (!__get_dumpable(cprm.mm_flags))
  		goto fail;
d84f4f992   David Howells   CRED: Inaugurate ...
2048
2049
  
  	cred = prepare_creds();
5e43aef53   Oleg Nesterov   coredump: factor ...
2050
  	if (!cred)
d84f4f992   David Howells   CRED: Inaugurate ...
2051
  		goto fail;
d6e711448   Alan Cox   [PATCH] setuid co...
2052
2053
2054
2055
2056
  	/*
  	 *	We cannot trust fsuid as being the "true" uid of the
  	 *	process nor do we know its entire history. We only know it
  	 *	was tainted so we dump it as root in mode 2.
  	 */
30736a4d4   Masami Hiramatsu   coredump: pass mm...
2057
2058
  	if (__get_dumpable(cprm.mm_flags) == 2) {
  		/* Setuid core dump mode */
d6e711448   Alan Cox   [PATCH] setuid co...
2059
  		flag = O_EXCL;		/* Stop rewrite attacks */
d84f4f992   David Howells   CRED: Inaugurate ...
2060
  		cred->fsuid = 0;	/* Dump root private */
d6e711448   Alan Cox   [PATCH] setuid co...
2061
  	}
1291cf416   Oleg Nesterov   [PATCH] fix de_th...
2062

9d5b327bf   Oleg Nesterov   coredump: make mm...
2063
  	retval = coredump_wait(exit_code, &core_state);
5e43aef53   Oleg Nesterov   coredump: factor ...
2064
2065
  	if (retval < 0)
  		goto fail_creds;
d84f4f992   David Howells   CRED: Inaugurate ...
2066
2067
  
  	old_cred = override_creds(cred);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2068
2069
2070
2071
2072
  
  	/*
  	 * Clear any false indication of pending signals that might
  	 * be seen by the filesystem code called to write the core file.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2073
  	clear_thread_flag(TIF_SIGPENDING);
1b0d300bd   Xiaotian Feng   core_pattern: fix...
2074
  	ispipe = format_corename(&cn, signr);
c4bbafda7   Alan Cox   exec.c: fix cored...
2075
   	if (ispipe) {
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
2076
2077
  		int dump_count;
  		char **helper_argv;
99b645674   Oleg Nesterov   do_coredump: fix ...
2078
2079
2080
2081
2082
2083
2084
  		if (ispipe < 0) {
  			printk(KERN_WARNING "format_corename failed
  ");
  			printk(KERN_WARNING "Aborting core
  ");
  			goto fail_corename;
  		}
898b374af   Neil Horman   exec: replace cal...
2085
  		if (cprm.limit == 1) {
725eae32d   Neil Horman   exec: make do_cor...
2086
2087
2088
  			/*
  			 * Normally core limits are irrelevant to pipes, since
  			 * we're not writing to the file system, but we use
898b374af   Neil Horman   exec: replace cal...
2089
2090
  			 * cprm.limit of 1 here as a speacial value. Any
  			 * non-1 limit gets set to RLIM_INFINITY below, but
725eae32d   Neil Horman   exec: make do_cor...
2091
2092
  			 * a limit of 0 skips the dump.  This is a consistent
  			 * way to catch recursive crashes.  We can still crash
898b374af   Neil Horman   exec: replace cal...
2093
  			 * if the core_pattern binary sets RLIM_CORE =  !1
725eae32d   Neil Horman   exec: make do_cor...
2094
2095
2096
2097
2098
2099
2100
  			 * but it runs as root, and can do lots of stupid things
  			 * Note that we use task_tgid_vnr here to grab the pid
  			 * of the process group leader.  That way we get the
  			 * right pid if a thread in a multi-threaded
  			 * core_pattern process dies.
  			 */
  			printk(KERN_WARNING
898b374af   Neil Horman   exec: replace cal...
2101
2102
  				"Process %d(%s) has RLIMIT_CORE set to 1
  ",
725eae32d   Neil Horman   exec: make do_cor...
2103
2104
2105
2106
2107
  				task_tgid_vnr(current), current->comm);
  			printk(KERN_WARNING "Aborting core
  ");
  			goto fail_unlock;
  		}
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
2108
  		cprm.limit = RLIM_INFINITY;
725eae32d   Neil Horman   exec: make do_cor...
2109

a293980c2   Neil Horman   exec: let do_core...
2110
2111
2112
2113
2114
2115
2116
2117
2118
  		dump_count = atomic_inc_return(&core_dump_count);
  		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
  			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit
  ",
  			       task_tgid_vnr(current), current->comm);
  			printk(KERN_WARNING "Skipping core dump
  ");
  			goto fail_dropcount;
  		}
1b0d300bd   Xiaotian Feng   core_pattern: fix...
2119
  		helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
350eaf791   Tetsuo Handa   do_coredump(): ch...
2120
2121
2122
2123
  		if (!helper_argv) {
  			printk(KERN_WARNING "%s failed to allocate memory
  ",
  			       __func__);
a293980c2   Neil Horman   exec: let do_core...
2124
  			goto fail_dropcount;
350eaf791   Tetsuo Handa   do_coredump(): ch...
2125
  		}
323211371   Neil Horman   core_pattern: fix...
2126

d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
2127
2128
2129
2130
2131
  		retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
  					NULL, UMH_WAIT_EXEC, umh_pipe_setup,
  					NULL, &cprm);
  		argv_free(helper_argv);
  		if (retval) {
d025c9db7   Andi Kleen   [PATCH] Support p...
2132
2133
   			printk(KERN_INFO "Core dump to %s pipe failed
  ",
1b0d300bd   Xiaotian Feng   core_pattern: fix...
2134
  			       cn.corename);
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
2135
  			goto close_fail;
d025c9db7   Andi Kleen   [PATCH] Support p...
2136
   		}
c71354112   Oleg Nesterov   coredump: factor ...
2137
2138
2139
2140
2141
  	} else {
  		struct inode *inode;
  
  		if (cprm.limit < binfmt->min_coredump)
  			goto fail_unlock;
1b0d300bd   Xiaotian Feng   core_pattern: fix...
2142
  		cprm.file = filp_open(cn.corename,
6d4df677f   Alexey Dobriyan   [PATCH] do_coredu...
2143
2144
  				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
  				 0600);
c71354112   Oleg Nesterov   coredump: factor ...
2145
2146
  		if (IS_ERR(cprm.file))
  			goto fail_unlock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2147

c71354112   Oleg Nesterov   coredump: factor ...
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
  		inode = cprm.file->f_path.dentry->d_inode;
  		if (inode->i_nlink > 1)
  			goto close_fail;
  		if (d_unhashed(cprm.file->f_path.dentry))
  			goto close_fail;
  		/*
  		 * AK: actually i see no reason to not allow this for named
  		 * pipes etc, but keep the previous behaviour for now.
  		 */
  		if (!S_ISREG(inode->i_mode))
  			goto close_fail;
  		/*
  		 * Dont allow local users get cute and trick others to coredump
  		 * into their pre-created files.
  		 */
  		if (inode->i_uid != current_fsuid())
  			goto close_fail;
  		if (!cprm.file->f_op || !cprm.file->f_op->write)
  			goto close_fail;
  		if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
  			goto close_fail;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2170

c71354112   Oleg Nesterov   coredump: factor ...
2171
  	retval = binfmt->core_dump(&cprm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2172
2173
  	if (retval)
  		current->signal->group_exit_code |= 0x80;
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
2174

61be228a0   Neil Horman   exec: allow do_co...
2175
  	if (ispipe && core_pipe_limit)
f6151dfea   Masami Hiramatsu   mm: introduce cor...
2176
  		wait_for_dump_helpers(cprm.file);
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
2177
2178
2179
  close_fail:
  	if (cprm.file)
  		filp_close(cprm.file, NULL);
a293980c2   Neil Horman   exec: let do_core...
2180
  fail_dropcount:
d5bf4c4f5   Oleg Nesterov   coredump: cleanup...
2181
  	if (ispipe)
a293980c2   Neil Horman   exec: let do_core...
2182
  		atomic_dec(&core_dump_count);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2183
  fail_unlock:
1b0d300bd   Xiaotian Feng   core_pattern: fix...
2184
2185
  	kfree(cn.corename);
  fail_corename:
5e43aef53   Oleg Nesterov   coredump: factor ...
2186
  	coredump_finish(mm);
d84f4f992   David Howells   CRED: Inaugurate ...
2187
  	revert_creds(old_cred);
5e43aef53   Oleg Nesterov   coredump: factor ...
2188
  fail_creds:
d84f4f992   David Howells   CRED: Inaugurate ...
2189
  	put_cred(cred);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2190
  fail:
8cd3ac3ac   WANG Cong   fs/exec.c: make d...
2191
  	return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2192
  }
3aa0ce825   Linus Torvalds   Un-inline the cor...
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
  
  /*
   * Core dumping helper functions.  These are the only things you should
   * do on a core-file: use only these functions to write out all the
   * necessary info.
   */
  int dump_write(struct file *file, const void *addr, int nr)
  {
  	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
  }
8fd01d6cf   Linus Torvalds   Export dump_{writ...
2203
  EXPORT_SYMBOL(dump_write);
3aa0ce825   Linus Torvalds   Un-inline the cor...
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
  
  int dump_seek(struct file *file, loff_t off)
  {
  	int ret = 1;
  
  	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
  		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
  			return 0;
  	} else {
  		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
  
  		if (!buf)
  			return 0;
  		while (off > 0) {
  			unsigned long n = off;
  
  			if (n > PAGE_SIZE)
  				n = PAGE_SIZE;
  			if (!dump_write(file, buf, n)) {
  				ret = 0;
  				break;
  			}
  			off -= n;
  		}
  		free_page((unsigned long)buf);
  	}
  	return ret;
  }
8fd01d6cf   Linus Torvalds   Export dump_{writ...
2232
  EXPORT_SYMBOL(dump_seek);