Blame view

mm/mmap.c 103 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
  /*
   * mm/mmap.c
   *
   * Written by obz.
   *
046c68842   Alan Cox   mm: update my add...
7
   * Address space accounting code	<alan@lxorguk.ukuu.org.uk>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
   */
b1de0d139   Mitchel Humpherys   mm: convert some ...
9
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
e8420a8ec   Cyril Hrubis   mm/mmap: check fo...
10
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
  #include <linux/slab.h>
4af3c9cc4   Alexey Dobriyan   Drop some headers...
12
  #include <linux/backing-dev.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
  #include <linux/mm.h>
615d6e875   Davidlohr Bueso   mm: per-thread vm...
14
  #include <linux/vmacache.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
18
19
  #include <linux/shm.h>
  #include <linux/mman.h>
  #include <linux/pagemap.h>
  #include <linux/swap.h>
  #include <linux/syscalls.h>
c59ede7b7   Randy.Dunlap   [PATCH] move capa...
20
  #include <linux/capability.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
23
24
25
26
  #include <linux/init.h>
  #include <linux/file.h>
  #include <linux/fs.h>
  #include <linux/personality.h>
  #include <linux/security.h>
  #include <linux/hugetlb.h>
c01d5b300   Hugh Dickins   shmem: get_unmapp...
27
  #include <linux/shmem_fs.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
  #include <linux/profile.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
29
  #include <linux/export.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
30
31
32
  #include <linux/mount.h>
  #include <linux/mempolicy.h>
  #include <linux/rmap.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
33
  #include <linux/mmu_notifier.h>
82f71ae4a   Konstantin Khlebnikov   mm: catch memory ...
34
  #include <linux/mmdebug.h>
cdd6c482c   Ingo Molnar   perf: Do the big ...
35
  #include <linux/perf_event.h>
120a795da   Al Viro   audit mmap
36
  #include <linux/audit.h>
b15d00b6a   Andrea Arcangeli   thp: khugepaged v...
37
  #include <linux/khugepaged.h>
2b1444983   Srikar Dronamraju   uprobes, mm, x86:...
38
  #include <linux/uprobes.h>
d37371870   Michel Lespinasse   mm: augment vma r...
39
  #include <linux/rbtree_augmented.h>
1640879af   Andrew Shewmaker   mm: reinititalise...
40
41
  #include <linux/notifier.h>
  #include <linux/memory.h>
b1de0d139   Mitchel Humpherys   mm: convert some ...
42
  #include <linux/printk.h>
19a809afe   Andrea Arcangeli   userfaultfd: teac...
43
  #include <linux/userfaultfd_k.h>
d977d56ce   Konstantin Khlebnikov   mm: warn about Vm...
44
  #include <linux/moduleparam.h>
62b5f7d01   Dave Hansen   mm/core, x86/mm/p...
45
  #include <linux/pkeys.h>
212925802   Andrea Arcangeli   mm: oom: let oom_...
46
  #include <linux/oom.h>
04f5866e4   Andrea Arcangeli   coredump: fix rac...
47
  #include <linux/sched/mm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
49
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50
51
  #include <asm/cacheflush.h>
  #include <asm/tlb.h>
d6dd61c83   Jeremy Fitzhardinge   [PATCH] x86: PARA...
52
  #include <asm/mmu_context.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53

df529cabb   Jaewon Kim   mm: mmap: add tra...
54
55
  #define CREATE_TRACE_POINTS
  #include <trace/events/mmap.h>
42b777281   Jan Beulich   mm: remove double...
56
  #include "internal.h"
3a4597568   Kirill Korotaev   [PATCH] IA64,spar...
57
58
59
  #ifndef arch_mmap_check
  #define arch_mmap_check(addr, len, flags)	(0)
  #endif
d07e22597   Daniel Cashman   mm: mmap: add new...
60
61
62
63
64
65
66
67
68
69
  #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
  const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
  const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
  int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
  #endif
  #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
  const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
  const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
  int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
  #endif
f4fcd5584   Konstantin Khlebnikov   mm: enable RLIMIT...
70
  static bool ignore_rlimit_data;
d977d56ce   Konstantin Khlebnikov   mm: warn about Vm...
71
  core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
d07e22597   Daniel Cashman   mm: mmap: add new...
72

e0da382c9   Hugh Dickins   [PATCH] freepgt: ...
73
74
75
  static void unmap_region(struct mm_struct *mm,
  		struct vm_area_struct *vma, struct vm_area_struct *prev,
  		unsigned long start, unsigned long end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
76
77
78
79
80
81
82
83
84
  /* description of effects of mapping type and prot in current implementation.
   * this is due to the limited x86 page protection hardware.  The expected
   * behavior is in parens:
   *
   * map_type	prot
   *		PROT_NONE	PROT_READ	PROT_WRITE	PROT_EXEC
   * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
   *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no
   *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
cc71aba34   vishnu.ps   mm/mmap.c: whites...
85
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86
87
88
   * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
   *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
   *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
   */
ac34ceaf1   Daniel Micay   mm/mmap.c: mark p...
90
  pgprot_t protection_map[16] __ro_after_init = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
91
92
93
  	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
  	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
  };
316d097c4   Dave Hansen   x86/pti: Filter a...
94
95
96
97
98
99
  #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
  static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
  {
  	return prot;
  }
  #endif
804af2cf6   Hugh Dickins   [AGPGART] remove ...
100
101
  pgprot_t vm_get_page_prot(unsigned long vm_flags)
  {
316d097c4   Dave Hansen   x86/pti: Filter a...
102
  	pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
b845f313d   Dave Kleikamp   mm: Allow archite...
103
104
  				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
  			pgprot_val(arch_vm_get_page_prot(vm_flags)));
316d097c4   Dave Hansen   x86/pti: Filter a...
105
106
  
  	return arch_filter_pgprot(ret);
804af2cf6   Hugh Dickins   [AGPGART] remove ...
107
108
  }
  EXPORT_SYMBOL(vm_get_page_prot);
64e455079   Peter Feiner   mm: softdirty: en...
109
110
111
112
113
114
115
116
117
  static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
  {
  	return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
  }
  
  /* Update vma->vm_page_prot to reflect vma->vm_flags. */
  void vma_set_page_prot(struct vm_area_struct *vma)
  {
  	unsigned long vm_flags = vma->vm_flags;
6d2329f88   Andrea Arcangeli   mm: vm_page_prot:...
118
  	pgprot_t vm_page_prot;
64e455079   Peter Feiner   mm: softdirty: en...
119

6d2329f88   Andrea Arcangeli   mm: vm_page_prot:...
120
121
  	vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
  	if (vma_wants_writenotify(vma, vm_page_prot)) {
64e455079   Peter Feiner   mm: softdirty: en...
122
  		vm_flags &= ~VM_SHARED;
6d2329f88   Andrea Arcangeli   mm: vm_page_prot:...
123
  		vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
64e455079   Peter Feiner   mm: softdirty: en...
124
  	}
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
125
  	/* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
6d2329f88   Andrea Arcangeli   mm: vm_page_prot:...
126
  	WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
64e455079   Peter Feiner   mm: softdirty: en...
127
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
128
  /*
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
129
   * Requires inode->i_mapping->i_mmap_rwsem
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
130
131
132
133
134
   */
  static void __remove_shared_vm_struct(struct vm_area_struct *vma,
  		struct file *file, struct address_space *mapping)
  {
  	if (vma->vm_flags & VM_DENYWRITE)
cb48841fb   Miaohe Lin   mm/mmap.c: use he...
135
  		allow_write_access(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
  	if (vma->vm_flags & VM_SHARED)
4bb5f5d93   David Herrmann   mm: allow drivers...
137
  		mapping_unmap_writable(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
138
139
  
  	flush_dcache_mmap_lock(mapping);
27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
140
  	vma_interval_tree_remove(vma, &mapping->i_mmap);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
141
142
143
144
  	flush_dcache_mmap_unlock(mapping);
  }
  
  /*
6b2dbba8b   Michel Lespinasse   mm: replace vma p...
145
   * Unlink a file-based vm structure from its interval tree, to hide
a8fb5618d   Hugh Dickins   [PATCH] mm: unlin...
146
   * vma from rmap and vmtruncate before freeing its page tables.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
147
   */
a8fb5618d   Hugh Dickins   [PATCH] mm: unlin...
148
  void unlink_file_vma(struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
150
  {
  	struct file *file = vma->vm_file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
151
152
  	if (file) {
  		struct address_space *mapping = file->f_mapping;
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
153
  		i_mmap_lock_write(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
  		__remove_shared_vm_struct(vma, file, mapping);
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
155
  		i_mmap_unlock_write(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
  	}
a8fb5618d   Hugh Dickins   [PATCH] mm: unlin...
157
158
159
160
161
162
163
164
  }
  
  /*
   * Close a vm structure and free it, returning the next.
   */
  static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
  {
  	struct vm_area_struct *next = vma->vm_next;
a8fb5618d   Hugh Dickins   [PATCH] mm: unlin...
165
  	might_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
166
167
  	if (vma->vm_ops && vma->vm_ops->close)
  		vma->vm_ops->close(vma);
e9714acf8   Konstantin Khlebnikov   mm: kill vma flag...
168
  	if (vma->vm_file)
a8fb5618d   Hugh Dickins   [PATCH] mm: unlin...
169
  		fput(vma->vm_file);
f0be3d32b   Lee Schermerhorn   mempolicy: rename...
170
  	mpol_put(vma_policy(vma));
3928d4f5e   Linus Torvalds   mm: use helper fu...
171
  	vm_area_free(vma);
a8fb5618d   Hugh Dickins   [PATCH] mm: unlin...
172
  	return next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
  }
bb177a732   Michal Hocko   mm: do not bug_on...
174
175
  static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
  		struct list_head *uf);
6a6160a7b   Heiko Carstens   [CVE-2009-0029] S...
176
  SYSCALL_DEFINE1(brk, unsigned long, brk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
  {
8764b338b   Cyrill Gorcunov   mm: use may_adjus...
178
  	unsigned long retval;
9bc8039e7   Yang Shi   mm: brk: downgrad...
179
  	unsigned long newbrk, oldbrk, origbrk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
180
  	struct mm_struct *mm = current->mm;
1be7107fb   Hugh Dickins   mm: larger stack ...
181
  	struct vm_area_struct *next;
a5b4592cf   Jiri Kosina   brk: make sys_brk...
182
  	unsigned long min_brk;
128557ffe   Michel Lespinasse   mm: use mm_popula...
183
  	bool populate;
9bc8039e7   Yang Shi   mm: brk: downgrad...
184
  	bool downgraded = false;
897ab3e0c   Mike Rapoport   userfaultfd: non-...
185
  	LIST_HEAD(uf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
186

d8ed45c5d   Michel Lespinasse   mmap locking API:...
187
  	if (mmap_write_lock_killable(mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
188
  		return -EINTR;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
189

9bc8039e7   Yang Shi   mm: brk: downgrad...
190
  	origbrk = mm->brk;
a5b4592cf   Jiri Kosina   brk: make sys_brk...
191
  #ifdef CONFIG_COMPAT_BRK
5520e8948   Jiri Kosina   brk: fix min_brk ...
192
193
194
195
196
  	/*
  	 * CONFIG_COMPAT_BRK can still be overridden by setting
  	 * randomize_va_space to 2, which will still cause mm->start_brk
  	 * to be arbitrarily shifted
  	 */
4471a675d   Jiri Kosina   brk: COMPAT_BRK: ...
197
  	if (current->brk_randomized)
5520e8948   Jiri Kosina   brk: fix min_brk ...
198
199
200
  		min_brk = mm->start_brk;
  	else
  		min_brk = mm->end_data;
a5b4592cf   Jiri Kosina   brk: make sys_brk...
201
202
203
204
  #else
  	min_brk = mm->start_brk;
  #endif
  	if (brk < min_brk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
205
  		goto out;
1e624196f   Ram Gupta   [PATCH] mm: fix b...
206
207
208
209
210
211
212
  
  	/*
  	 * Check against rlimit here. If this check is done later after the test
  	 * of oldbrk with newbrk then it can escape the test and let the data
  	 * segment grow beyond its set limit the in case where the limit is
  	 * not page aligned -Ram Gupta
  	 */
8764b338b   Cyrill Gorcunov   mm: use may_adjus...
213
214
  	if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
  			      mm->end_data, mm->start_data))
1e624196f   Ram Gupta   [PATCH] mm: fix b...
215
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
216
217
  	newbrk = PAGE_ALIGN(brk);
  	oldbrk = PAGE_ALIGN(mm->brk);
9bc8039e7   Yang Shi   mm: brk: downgrad...
218
219
220
221
  	if (oldbrk == newbrk) {
  		mm->brk = brk;
  		goto success;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222

9bc8039e7   Yang Shi   mm: brk: downgrad...
223
224
  	/*
  	 * Always allow shrinking brk.
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
225
  	 * __do_munmap() may downgrade mmap_lock to read.
9bc8039e7   Yang Shi   mm: brk: downgrad...
226
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227
  	if (brk <= mm->brk) {
9bc8039e7   Yang Shi   mm: brk: downgrad...
228
229
230
  		int ret;
  
  		/*
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
231
232
  		 * mm->brk must to be protected by write mmap_lock so update it
  		 * before downgrading mmap_lock. When __do_munmap() fails,
9bc8039e7   Yang Shi   mm: brk: downgrad...
233
234
235
236
237
238
239
240
241
242
243
  		 * mm->brk will be restored from origbrk.
  		 */
  		mm->brk = brk;
  		ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
  		if (ret < 0) {
  			mm->brk = origbrk;
  			goto out;
  		} else if (ret == 1) {
  			downgraded = true;
  		}
  		goto success;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245
  	/* Check against existing mmap mappings. */
1be7107fb   Hugh Dickins   mm: larger stack ...
246
247
  	next = find_vma(mm, oldbrk);
  	if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
249
250
  		goto out;
  
  	/* Ok, looks good - let it rip. */
bb177a732   Michal Hocko   mm: do not bug_on...
251
  	if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
253
  	mm->brk = brk;
9bc8039e7   Yang Shi   mm: brk: downgrad...
254
255
  
  success:
128557ffe   Michel Lespinasse   mm: use mm_popula...
256
  	populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
9bc8039e7   Yang Shi   mm: brk: downgrad...
257
  	if (downgraded)
d8ed45c5d   Michel Lespinasse   mmap locking API:...
258
  		mmap_read_unlock(mm);
9bc8039e7   Yang Shi   mm: brk: downgrad...
259
  	else
d8ed45c5d   Michel Lespinasse   mmap locking API:...
260
  		mmap_write_unlock(mm);
897ab3e0c   Mike Rapoport   userfaultfd: non-...
261
  	userfaultfd_unmap_complete(mm, &uf);
128557ffe   Michel Lespinasse   mm: use mm_popula...
262
263
264
  	if (populate)
  		mm_populate(oldbrk, newbrk - oldbrk);
  	return brk;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
265
  out:
9bc8039e7   Yang Shi   mm: brk: downgrad...
266
  	retval = origbrk;
d8ed45c5d   Michel Lespinasse   mmap locking API:...
267
  	mmap_write_unlock(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
269
  	return retval;
  }
315cc066b   Michel Lespinasse   augmented rbtree:...
270
  static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
d37371870   Michel Lespinasse   mm: augment vma r...
271
  {
315cc066b   Michel Lespinasse   augmented rbtree:...
272
  	unsigned long gap, prev_end;
1be7107fb   Hugh Dickins   mm: larger stack ...
273
274
275
276
277
278
279
  
  	/*
  	 * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
  	 * allow two stack_guard_gaps between them here, and when choosing
  	 * an unmapped area; whereas when expanding we only require one.
  	 * That's a little inconsistent, but keeps the code here simpler.
  	 */
315cc066b   Michel Lespinasse   augmented rbtree:...
280
  	gap = vm_start_gap(vma);
1be7107fb   Hugh Dickins   mm: larger stack ...
281
282
  	if (vma->vm_prev) {
  		prev_end = vm_end_gap(vma->vm_prev);
315cc066b   Michel Lespinasse   augmented rbtree:...
283
284
  		if (gap > prev_end)
  			gap -= prev_end;
1be7107fb   Hugh Dickins   mm: larger stack ...
285
  		else
315cc066b   Michel Lespinasse   augmented rbtree:...
286
  			gap = 0;
1be7107fb   Hugh Dickins   mm: larger stack ...
287
  	}
315cc066b   Michel Lespinasse   augmented rbtree:...
288
289
290
291
292
293
294
  	return gap;
  }
  
  #ifdef CONFIG_DEBUG_VM_RB
  static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
  {
  	unsigned long max = vma_compute_gap(vma), subtree_gap;
d37371870   Michel Lespinasse   mm: augment vma r...
295
296
297
298
299
300
301
302
303
304
305
306
307
308
  	if (vma->vm_rb.rb_left) {
  		subtree_gap = rb_entry(vma->vm_rb.rb_left,
  				struct vm_area_struct, vm_rb)->rb_subtree_gap;
  		if (subtree_gap > max)
  			max = subtree_gap;
  	}
  	if (vma->vm_rb.rb_right) {
  		subtree_gap = rb_entry(vma->vm_rb.rb_right,
  				struct vm_area_struct, vm_rb)->rb_subtree_gap;
  		if (subtree_gap > max)
  			max = subtree_gap;
  	}
  	return max;
  }
acf128d04   Andrea Arcangeli   mm: validate_mm b...
309
  static int browse_rb(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
  {
acf128d04   Andrea Arcangeli   mm: validate_mm b...
311
  	struct rb_root *root = &mm->mm_rb;
5a0768f64   Michel Lespinasse   mm: check rb_subt...
312
  	int i = 0, j, bug = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
313
314
315
316
317
318
  	struct rb_node *nd, *pn = NULL;
  	unsigned long prev = 0, pend = 0;
  
  	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
  		struct vm_area_struct *vma;
  		vma = rb_entry(nd, struct vm_area_struct, vm_rb);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
319
  		if (vma->vm_start < prev) {
ff26f70f4   Andrew Morton   mm/mmap.c: clean ...
320
321
322
  			pr_emerg("vm_start %lx < prev %lx
  ",
  				  vma->vm_start, prev);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
323
324
325
  			bug = 1;
  		}
  		if (vma->vm_start < pend) {
ff26f70f4   Andrew Morton   mm/mmap.c: clean ...
326
327
328
  			pr_emerg("vm_start %lx < pend %lx
  ",
  				  vma->vm_start, pend);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
329
330
331
  			bug = 1;
  		}
  		if (vma->vm_start > vma->vm_end) {
ff26f70f4   Andrew Morton   mm/mmap.c: clean ...
332
333
334
  			pr_emerg("vm_start %lx > vm_end %lx
  ",
  				  vma->vm_start, vma->vm_end);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
335
336
  			bug = 1;
  		}
acf128d04   Andrea Arcangeli   mm: validate_mm b...
337
  		spin_lock(&mm->page_table_lock);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
338
  		if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
8542bdfc6   Sasha Levin   mm/mmap.c: use pr...
339
340
  			pr_emerg("free gap %lx, correct %lx
  ",
5a0768f64   Michel Lespinasse   mm: check rb_subt...
341
342
343
344
  			       vma->rb_subtree_gap,
  			       vma_compute_subtree_gap(vma));
  			bug = 1;
  		}
acf128d04   Andrea Arcangeli   mm: validate_mm b...
345
  		spin_unlock(&mm->page_table_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
346
347
  		i++;
  		pn = nd;
d1af65d13   David Miller   [PATCH] Bug in MM...
348
349
  		prev = vma->vm_start;
  		pend = vma->vm_end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
350
351
  	}
  	j = 0;
5a0768f64   Michel Lespinasse   mm: check rb_subt...
352
  	for (nd = pn; nd; nd = rb_prev(nd))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
353
  		j++;
5a0768f64   Michel Lespinasse   mm: check rb_subt...
354
  	if (i != j) {
8542bdfc6   Sasha Levin   mm/mmap.c: use pr...
355
356
  		pr_emerg("backwards %d, forwards %d
  ", j, i);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
357
  		bug = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
358
  	}
5a0768f64   Michel Lespinasse   mm: check rb_subt...
359
  	return bug ? -1 : i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
360
  }
d37371870   Michel Lespinasse   mm: augment vma r...
361
362
363
364
365
366
367
  static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
  {
  	struct rb_node *nd;
  
  	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
  		struct vm_area_struct *vma;
  		vma = rb_entry(nd, struct vm_area_struct, vm_rb);
96dad67ff   Sasha Levin   mm: use VM_BUG_ON...
368
369
370
  		VM_BUG_ON_VMA(vma != ignore &&
  			vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
  			vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
371
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
372
  }
eafd4dc4d   Rashika Kheria   mm/mmap.c: mark f...
373
  static void validate_mm(struct mm_struct *mm)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374
375
376
  {
  	int bug = 0;
  	int i = 0;
5a0768f64   Michel Lespinasse   mm: check rb_subt...
377
  	unsigned long highest_address = 0;
ed8ea8150   Michel Lespinasse   mm: add CONFIG_DE...
378
  	struct vm_area_struct *vma = mm->mmap;
ff26f70f4   Andrew Morton   mm/mmap.c: clean ...
379

ed8ea8150   Michel Lespinasse   mm: add CONFIG_DE...
380
  	while (vma) {
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
381
  		struct anon_vma *anon_vma = vma->anon_vma;
ed8ea8150   Michel Lespinasse   mm: add CONFIG_DE...
382
  		struct anon_vma_chain *avc;
ff26f70f4   Andrew Morton   mm/mmap.c: clean ...
383

12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
384
385
386
387
388
389
  		if (anon_vma) {
  			anon_vma_lock_read(anon_vma);
  			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
  				anon_vma_interval_tree_verify(avc);
  			anon_vma_unlock_read(anon_vma);
  		}
1be7107fb   Hugh Dickins   mm: larger stack ...
390
  		highest_address = vm_end_gap(vma);
ed8ea8150   Michel Lespinasse   mm: add CONFIG_DE...
391
  		vma = vma->vm_next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
392
393
  		i++;
  	}
5a0768f64   Michel Lespinasse   mm: check rb_subt...
394
  	if (i != mm->map_count) {
8542bdfc6   Sasha Levin   mm/mmap.c: use pr...
395
396
  		pr_emerg("map_count %d vm_next %d
  ", mm->map_count, i);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
397
398
399
  		bug = 1;
  	}
  	if (highest_address != mm->highest_vm_end) {
8542bdfc6   Sasha Levin   mm/mmap.c: use pr...
400
401
  		pr_emerg("mm->highest_vm_end %lx, found %lx
  ",
ff26f70f4   Andrew Morton   mm/mmap.c: clean ...
402
  			  mm->highest_vm_end, highest_address);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
403
404
  		bug = 1;
  	}
acf128d04   Andrea Arcangeli   mm: validate_mm b...
405
  	i = browse_rb(mm);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
406
  	if (i != mm->map_count) {
ff26f70f4   Andrew Morton   mm/mmap.c: clean ...
407
408
409
  		if (i != -1)
  			pr_emerg("map_count %d rb %d
  ", mm->map_count, i);
5a0768f64   Michel Lespinasse   mm: check rb_subt...
410
411
  		bug = 1;
  	}
96dad67ff   Sasha Levin   mm: use VM_BUG_ON...
412
  	VM_BUG_ON_MM(bug, mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
414
  }
  #else
d37371870   Michel Lespinasse   mm: augment vma r...
415
  #define validate_mm_rb(root, ignore) do { } while (0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
416
417
  #define validate_mm(mm) do { } while (0)
  #endif
315cc066b   Michel Lespinasse   augmented rbtree:...
418
419
420
  RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
  			 struct vm_area_struct, vm_rb,
  			 unsigned long, rb_subtree_gap, vma_compute_gap)
d37371870   Michel Lespinasse   mm: augment vma r...
421
422
423
424
425
426
427
428
429
  
  /*
   * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
   * vma->vm_prev->vm_end values changed, without modifying the vma's position
   * in the rbtree.
   */
  static void vma_gap_update(struct vm_area_struct *vma)
  {
  	/*
315cc066b   Michel Lespinasse   augmented rbtree:...
430
431
  	 * As it turns out, RB_DECLARE_CALLBACKS_MAX() already created
  	 * a callback function that does exactly what we want.
d37371870   Michel Lespinasse   mm: augment vma r...
432
433
434
435
436
437
438
439
440
441
442
443
  	 */
  	vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
  }
  
  static inline void vma_rb_insert(struct vm_area_struct *vma,
  				 struct rb_root *root)
  {
  	/* All rb_subtree_gap values must be consistent prior to insertion */
  	validate_mm_rb(root, NULL);
  
  	rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
  }
8f26e0b17   Andrea Arcangeli   mm: vma_merge: co...
444
  static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
d37371870   Michel Lespinasse   mm: augment vma r...
445
446
  {
  	/*
d37371870   Michel Lespinasse   mm: augment vma r...
447
448
449
450
451
452
  	 * Note rb_erase_augmented is a fairly large inline function,
  	 * so make sure we instantiate it only once with our desired
  	 * augmented rbtree callbacks.
  	 */
  	rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
  }
8f26e0b17   Andrea Arcangeli   mm: vma_merge: co...
453
454
455
456
457
458
  static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
  						struct rb_root *root,
  						struct vm_area_struct *ignore)
  {
  	/*
  	 * All rb_subtree_gap values must be consistent prior to erase,
4d1e72437   Wei Yang   mm/mmap: leverage...
459
460
461
462
463
464
  	 * with the possible exception of
  	 *
  	 * a. the "next" vma being erased if next->vm_start was reduced in
  	 *    __vma_adjust() -> __vma_unlink()
  	 * b. the vma being erased in detach_vmas_to_be_unmapped() ->
  	 *    vma_rb_erase()
8f26e0b17   Andrea Arcangeli   mm: vma_merge: co...
465
466
467
468
469
470
471
472
473
  	 */
  	validate_mm_rb(root, ignore);
  
  	__vma_rb_erase(vma, root);
  }
  
  static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
  					 struct rb_root *root)
  {
4d1e72437   Wei Yang   mm/mmap: leverage...
474
  	vma_rb_erase_ignore(vma, root, vma);
8f26e0b17   Andrea Arcangeli   mm: vma_merge: co...
475
  }
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
476
477
478
479
480
481
482
483
484
485
486
  /*
   * vma has some anon_vma assigned, and is already inserted on that
   * anon_vma's interval trees.
   *
   * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the
   * vma must be removed from the anon_vma's interval trees using
   * anon_vma_interval_tree_pre_update_vma().
   *
   * After the update, the vma will be reinserted using
   * anon_vma_interval_tree_post_update_vma().
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
487
   * The entire update must be protected by exclusive mmap_lock and by
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
   * the root anon_vma's mutex.
   */
  static inline void
  anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
  {
  	struct anon_vma_chain *avc;
  
  	list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
  		anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
  }
  
  static inline void
  anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
  {
  	struct anon_vma_chain *avc;
  
  	list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
  		anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
  }
6597d7833   Hugh Dickins   mm/mmap.c: replac...
507
508
509
  static int find_vma_links(struct mm_struct *mm, unsigned long addr,
  		unsigned long end, struct vm_area_struct **pprev,
  		struct rb_node ***rb_link, struct rb_node **rb_parent)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
  {
6597d7833   Hugh Dickins   mm/mmap.c: replac...
511
  	struct rb_node **__rb_link, *__rb_parent, *rb_prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
512
513
514
  
  	__rb_link = &mm->mm_rb.rb_node;
  	rb_prev = __rb_parent = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
515
516
517
518
519
520
521
522
  
  	while (*__rb_link) {
  		struct vm_area_struct *vma_tmp;
  
  		__rb_parent = *__rb_link;
  		vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
  
  		if (vma_tmp->vm_end > addr) {
6597d7833   Hugh Dickins   mm/mmap.c: replac...
523
524
525
  			/* Fail if an existing vma overlaps the area */
  			if (vma_tmp->vm_start < end)
  				return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
526
527
528
529
530
531
532
533
534
535
536
537
  			__rb_link = &__rb_parent->rb_left;
  		} else {
  			rb_prev = __rb_parent;
  			__rb_link = &__rb_parent->rb_right;
  		}
  	}
  
  	*pprev = NULL;
  	if (rb_prev)
  		*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
  	*rb_link = __rb_link;
  	*rb_parent = __rb_parent;
6597d7833   Hugh Dickins   mm/mmap.c: replac...
538
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
539
  }
3903b55a6   Liam R. Howlett   mm/mmap: add inli...
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
  /*
   * vma_next() - Get the next VMA.
   * @mm: The mm_struct.
   * @vma: The current vma.
   *
   * If @vma is NULL, return the first vma in the mm.
   *
   * Returns: The next VMA after @vma.
   */
  static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
  					 struct vm_area_struct *vma)
  {
  	if (!vma)
  		return mm->mmap;
  
  	return vma->vm_next;
  }
fb8090b69   Liam R. Howlett   mm/mmap: add inli...
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
  
  /*
   * munmap_vma_range() - munmap VMAs that overlap a range.
   * @mm: The mm struct
   * @start: The start of the range.
   * @len: The length of the range.
   * @pprev: pointer to the pointer that will be set to previous vm_area_struct
   * @rb_link: the rb_node
   * @rb_parent: the parent rb_node
   *
   * Find all the vm_area_struct that overlap from @start to
   * @end and munmap them.  Set @pprev to the previous vm_area_struct.
   *
   * Returns: -ENOMEM on munmap failure or 0 on success.
   */
  static inline int
  munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
  		 struct vm_area_struct **pprev, struct rb_node ***link,
  		 struct rb_node **parent, struct list_head *uf)
  {
  
  	while (find_vma_links(mm, start, start + len, pprev, link, parent))
  		if (do_munmap(mm, start, len, uf))
  			return -ENOMEM;
  
  	return 0;
  }
e8420a8ec   Cyril Hrubis   mm/mmap: check fo...
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
  static unsigned long count_vma_pages_range(struct mm_struct *mm,
  		unsigned long addr, unsigned long end)
  {
  	unsigned long nr_pages = 0;
  	struct vm_area_struct *vma;
  
  	/* Find first overlaping mapping */
  	vma = find_vma_intersection(mm, addr, end);
  	if (!vma)
  		return 0;
  
  	nr_pages = (min(end, vma->vm_end) -
  		max(addr, vma->vm_start)) >> PAGE_SHIFT;
  
  	/* Iterate over the rest of the overlaps */
  	for (vma = vma->vm_next; vma; vma = vma->vm_next) {
  		unsigned long overlap_len;
  
  		if (vma->vm_start > end)
  			break;
  
  		overlap_len = min(end, vma->vm_end) - vma->vm_start;
  		nr_pages += overlap_len >> PAGE_SHIFT;
  	}
  
  	return nr_pages;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611
612
613
  void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
  		struct rb_node **rb_link, struct rb_node *rb_parent)
  {
d37371870   Michel Lespinasse   mm: augment vma r...
614
615
616
617
  	/* Update tracking information for the gap following the new vma. */
  	if (vma->vm_next)
  		vma_gap_update(vma->vm_next);
  	else
1be7107fb   Hugh Dickins   mm: larger stack ...
618
  		mm->highest_vm_end = vm_end_gap(vma);
d37371870   Michel Lespinasse   mm: augment vma r...
619
620
621
622
623
624
625
626
627
628
  
  	/*
  	 * vma->vm_prev wasn't known when we followed the rbtree to find the
  	 * correct insertion point for that vma. As a result, we could not
  	 * update the vma vm_rb parents rb_subtree_gap values on the way down.
  	 * So, we first insert the vma with a zero rb_subtree_gap value
  	 * (to be consistent with what we did on the way down), and then
  	 * immediately update the gap to the correct value. Finally we
  	 * rebalance the rbtree after all augmented values have been set.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
629
  	rb_link_node(&vma->vm_rb, rb_parent, rb_link);
d37371870   Michel Lespinasse   mm: augment vma r...
630
631
632
  	vma->rb_subtree_gap = 0;
  	vma_gap_update(vma);
  	vma_rb_insert(vma, &mm->mm_rb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
633
  }
cb8f488c3   Denys Vlasenko   mmap.c: deinline ...
634
  static void __vma_link_file(struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
635
  {
48aae4255   ZhenwenXu   mm/mmap.c: fix co...
636
  	struct file *file;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
637
638
639
640
641
642
  
  	file = vma->vm_file;
  	if (file) {
  		struct address_space *mapping = file->f_mapping;
  
  		if (vma->vm_flags & VM_DENYWRITE)
73eb7f9a4   Miaohe Lin   mm: use helper fu...
643
  			put_write_access(file_inode(file));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
644
  		if (vma->vm_flags & VM_SHARED)
cf508b584   Miaohe Lin   mm: use helper fu...
645
  			mapping_allow_writable(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
646
647
  
  		flush_dcache_mmap_lock(mapping);
27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
648
  		vma_interval_tree_insert(vma, &mapping->i_mmap);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
649
650
651
652
653
654
655
656
657
  		flush_dcache_mmap_unlock(mapping);
  	}
  }
  
  static void
  __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
  	struct vm_area_struct *prev, struct rb_node **rb_link,
  	struct rb_node *rb_parent)
  {
aba6dfb75   Wei Yang   mm/mmap.c: rb_par...
658
  	__vma_link_list(mm, vma, prev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
659
  	__vma_link_rb(mm, vma, rb_link, rb_parent);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
660
661
662
663
664
665
666
  }
  
  static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
  			struct vm_area_struct *prev, struct rb_node **rb_link,
  			struct rb_node *rb_parent)
  {
  	struct address_space *mapping = NULL;
64ac4940d   Huang Shijie   mm/mmap.c: remove...
667
  	if (vma->vm_file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
668
  		mapping = vma->vm_file->f_mapping;
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
669
  		i_mmap_lock_write(mapping);
64ac4940d   Huang Shijie   mm/mmap.c: remove...
670
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
671
672
673
  
  	__vma_link(mm, vma, prev, rb_link, rb_parent);
  	__vma_link_file(vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
674
  	if (mapping)
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
675
  		i_mmap_unlock_write(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
676
677
678
679
680
681
  
  	mm->map_count++;
  	validate_mm(mm);
  }
  
  /*
88f6b4c32   Kautuk Consul   mmap.c: fix comme...
682
   * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
6b2dbba8b   Michel Lespinasse   mm: replace vma p...
683
   * mm's list and rbtree.  It has already been inserted into the interval tree.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
684
   */
48aae4255   ZhenwenXu   mm/mmap.c: fix co...
685
  static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686
  {
6597d7833   Hugh Dickins   mm/mmap.c: replac...
687
  	struct vm_area_struct *prev;
48aae4255   ZhenwenXu   mm/mmap.c: fix co...
688
  	struct rb_node **rb_link, *rb_parent;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
689

6597d7833   Hugh Dickins   mm/mmap.c: replac...
690
691
692
  	if (find_vma_links(mm, vma->vm_start, vma->vm_end,
  			   &prev, &rb_link, &rb_parent))
  		BUG();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
694
695
  	__vma_link(mm, vma, prev, rb_link, rb_parent);
  	mm->map_count++;
  }
7c61f917b   Wei Yang   mm/mmap: rename _...
696
  static __always_inline void __vma_unlink(struct mm_struct *mm,
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
697
  						struct vm_area_struct *vma,
8f26e0b17   Andrea Arcangeli   mm: vma_merge: co...
698
  						struct vm_area_struct *ignore)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
699
  {
8f26e0b17   Andrea Arcangeli   mm: vma_merge: co...
700
  	vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
1b9fc5b24   Wei Yang   mm/mmap.c: extrac...
701
  	__vma_unlink_list(mm, vma);
615d6e875   Davidlohr Bueso   mm: per-thread vm...
702
703
  	/* Kill the cache */
  	vmacache_invalidate(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
704
705
706
707
708
709
710
711
712
  }
  
  /*
   * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
   * is already present in an i_mmap tree without adjusting the tree.
   * The following helper function should be used when such adjustments
   * are necessary.  The "insert" vma (if any) is to be inserted
   * before we drop the necessary locks.
   */
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
713
714
715
  int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
  	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
  	struct vm_area_struct *expand)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
716
717
  {
  	struct mm_struct *mm = vma->vm_mm;
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
718
  	struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
719
  	struct address_space *mapping = NULL;
f808c13fd   Davidlohr Bueso   lib/interval_tree...
720
  	struct rb_root_cached *root = NULL;
012f18004   Rik van Riel   mm: always lock t...
721
  	struct anon_vma *anon_vma = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
722
  	struct file *file = vma->vm_file;
d37371870   Michel Lespinasse   mm: augment vma r...
723
  	bool start_changed = false, end_changed = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
725
726
727
  	long adjust_next = 0;
  	int remove_next = 0;
  
  	if (next && !insert) {
734537c9c   Kirill A. Shutemov   mm: fix use-after...
728
  		struct vm_area_struct *exporter = NULL, *importer = NULL;
287d97ac0   Linus Torvalds   vma_adjust: fix t...
729

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730
731
732
733
  		if (end >= next->vm_end) {
  			/*
  			 * vma expands, overlapping all the next, and
  			 * perhaps the one after too (mprotect case 6).
86d12e471   Andrea Arcangeli   mm: vma_adjust: m...
734
  			 * The only other cases that gets here are
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
735
  			 * case 1, case 7 and case 8.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
736
  			 */
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
  			if (next == expand) {
  				/*
  				 * The only case where we don't expand "vma"
  				 * and we expand "next" instead is case 8.
  				 */
  				VM_WARN_ON(end != next->vm_end);
  				/*
  				 * remove_next == 3 means we're
  				 * removing "vma" and that to do so we
  				 * swapped "vma" and "next".
  				 */
  				remove_next = 3;
  				VM_WARN_ON(file != next->vm_file);
  				swap(vma, next);
  			} else {
  				VM_WARN_ON(expand != vma);
  				/*
  				 * case 1, 6, 7, remove_next == 2 is case 6,
  				 * remove_next == 1 is case 1 or 7.
  				 */
  				remove_next = 1 + (end > next->vm_end);
  				VM_WARN_ON(remove_next == 2 &&
  					   end != next->vm_next->vm_end);
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
760
761
762
  				/* trim end to next, for case 6 first pass */
  				end = next->vm_end;
  			}
287d97ac0   Linus Torvalds   vma_adjust: fix t...
763
  			exporter = next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764
  			importer = vma;
734537c9c   Kirill A. Shutemov   mm: fix use-after...
765
766
767
768
769
  
  			/*
  			 * If next doesn't have anon_vma, import from vma after
  			 * next, if the vma overlaps with it.
  			 */
97a42cd43   Andrea Arcangeli   mm: vma_adjust: r...
770
  			if (remove_next == 2 && !next->anon_vma)
734537c9c   Kirill A. Shutemov   mm: fix use-after...
771
  				exporter = next->vm_next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
774
775
776
  		} else if (end > next->vm_start) {
  			/*
  			 * vma expands, overlapping part of the next:
  			 * mprotect case 5 shifting the boundary up.
  			 */
f9d86a605   Wei Yang   mm/mmap: leave ad...
777
  			adjust_next = (end - next->vm_start);
287d97ac0   Linus Torvalds   vma_adjust: fix t...
778
  			exporter = next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
779
  			importer = vma;
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
780
  			VM_WARN_ON(expand != importer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
781
782
783
784
785
786
  		} else if (end < vma->vm_end) {
  			/*
  			 * vma shrinks, and !insert tells it's not
  			 * split_vma inserting another: so it must be
  			 * mprotect case 4 shifting the boundary down.
  			 */
f9d86a605   Wei Yang   mm/mmap: leave ad...
787
  			adjust_next = -(vma->vm_end - end);
287d97ac0   Linus Torvalds   vma_adjust: fix t...
788
  			exporter = vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
  			importer = next;
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
790
  			VM_WARN_ON(expand != importer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
791
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
792

5beb49305   Rik van Riel   mm: change anon_v...
793
794
795
796
797
  		/*
  		 * Easily overlooked: when mprotect shifts the boundary,
  		 * make sure the expanding vma has anon_vma set if the
  		 * shrinking vma had, to cover any anon pages imported.
  		 */
287d97ac0   Linus Torvalds   vma_adjust: fix t...
798
  		if (exporter && exporter->anon_vma && !importer->anon_vma) {
c4ea95d7c   Daniel Forrest   mm: fix anon_vma_...
799
  			int error;
b800c91a0   Konstantin Khlebnikov   mm: fix corner ca...
800
  			importer->anon_vma = exporter->anon_vma;
c4ea95d7c   Daniel Forrest   mm: fix anon_vma_...
801
  			error = anon_vma_clone(importer, exporter);
3fe89b3e2   Leon Yu   mm: fix anon_vma-...
802
  			if (error)
c4ea95d7c   Daniel Forrest   mm: fix anon_vma_...
803
  				return error;
5beb49305   Rik van Riel   mm: change anon_v...
804
805
  		}
  	}
734537c9c   Kirill A. Shutemov   mm: fix use-after...
806
  again:
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
807
  	vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
37f9f5595   Kirill A. Shutemov   thp: run vma_adju...
808

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
809
810
  	if (file) {
  		mapping = file->f_mapping;
27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
811
812
  		root = &mapping->i_mmap;
  		uprobe_munmap(vma, vma->vm_start, vma->vm_end);
682968e0c   Srikar Dronamraju   uprobes/core: Opt...
813

27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
814
815
  		if (adjust_next)
  			uprobe_munmap(next, next->vm_start, next->vm_end);
682968e0c   Srikar Dronamraju   uprobes/core: Opt...
816

83cde9e8b   Davidlohr Bueso   mm: use new helpe...
817
  		i_mmap_lock_write(mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
818
  		if (insert) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
819
  			/*
6b2dbba8b   Michel Lespinasse   mm: replace vma p...
820
  			 * Put into interval tree now, so instantiated pages
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
821
822
823
824
825
826
827
  			 * are visible to arm/parisc __flush_dcache_page
  			 * throughout; but we cannot insert into address
  			 * space until vma start or end is updated.
  			 */
  			__vma_link_file(insert);
  		}
  	}
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
828
829
830
831
  	anon_vma = vma->anon_vma;
  	if (!anon_vma && adjust_next)
  		anon_vma = next->anon_vma;
  	if (anon_vma) {
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
832
833
  		VM_WARN_ON(adjust_next && next->anon_vma &&
  			   anon_vma != next->anon_vma);
4fc3f1d66   Ingo Molnar   mm/rmap, migratio...
834
  		anon_vma_lock_write(anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
835
836
837
838
  		anon_vma_interval_tree_pre_update_vma(vma);
  		if (adjust_next)
  			anon_vma_interval_tree_pre_update_vma(next);
  	}
012f18004   Rik van Riel   mm: always lock t...
839

0fc48a6e2   Wei Yang   mm/mmap: check on...
840
  	if (file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
841
  		flush_dcache_mmap_lock(mapping);
6b2dbba8b   Michel Lespinasse   mm: replace vma p...
842
  		vma_interval_tree_remove(vma, root);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
843
  		if (adjust_next)
6b2dbba8b   Michel Lespinasse   mm: replace vma p...
844
  			vma_interval_tree_remove(next, root);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
845
  	}
d37371870   Michel Lespinasse   mm: augment vma r...
846
847
848
849
850
851
852
853
  	if (start != vma->vm_start) {
  		vma->vm_start = start;
  		start_changed = true;
  	}
  	if (end != vma->vm_end) {
  		vma->vm_end = end;
  		end_changed = true;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
854
855
  	vma->vm_pgoff = pgoff;
  	if (adjust_next) {
f9d86a605   Wei Yang   mm/mmap: leave ad...
856
857
  		next->vm_start += adjust_next;
  		next->vm_pgoff += adjust_next >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
858
  	}
0fc48a6e2   Wei Yang   mm/mmap: check on...
859
  	if (file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
860
  		if (adjust_next)
6b2dbba8b   Michel Lespinasse   mm: replace vma p...
861
862
  			vma_interval_tree_insert(next, root);
  		vma_interval_tree_insert(vma, root);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
863
864
865
866
867
868
869
870
  		flush_dcache_mmap_unlock(mapping);
  	}
  
  	if (remove_next) {
  		/*
  		 * vma_merge has merged next into vma, and needs
  		 * us to remove next before dropping the locks.
  		 */
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
871
  		if (remove_next != 3)
7c61f917b   Wei Yang   mm/mmap: rename _...
872
  			__vma_unlink(mm, next, next);
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
873
  		else
8f26e0b17   Andrea Arcangeli   mm: vma_merge: co...
874
875
876
877
878
879
880
881
882
  			/*
  			 * vma is not before next if they've been
  			 * swapped.
  			 *
  			 * pre-swap() next->vm_start was reduced so
  			 * tell validate_mm_rb to ignore pre-swap()
  			 * "next" (which is stored in post-swap()
  			 * "vma").
  			 */
7c61f917b   Wei Yang   mm/mmap: rename _...
883
  			__vma_unlink(mm, next, vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
884
885
  		if (file)
  			__remove_shared_vm_struct(next, file, mapping);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
886
887
888
889
890
891
892
  	} else if (insert) {
  		/*
  		 * split_vma has split insert from vma, and needs
  		 * us to insert it before dropping the locks
  		 * (it may either follow vma or precede it).
  		 */
  		__insert_vm_struct(mm, insert);
d37371870   Michel Lespinasse   mm: augment vma r...
893
894
895
896
897
  	} else {
  		if (start_changed)
  			vma_gap_update(vma);
  		if (end_changed) {
  			if (!next)
1be7107fb   Hugh Dickins   mm: larger stack ...
898
  				mm->highest_vm_end = vm_end_gap(vma);
d37371870   Michel Lespinasse   mm: augment vma r...
899
900
901
  			else if (!adjust_next)
  				vma_gap_update(next);
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
902
  	}
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
903
904
905
906
  	if (anon_vma) {
  		anon_vma_interval_tree_post_update_vma(vma);
  		if (adjust_next)
  			anon_vma_interval_tree_post_update_vma(next);
08b52706d   Konstantin Khlebnikov   mm/rmap: rename a...
907
  		anon_vma_unlock_write(anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
908
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
909

0fc48a6e2   Wei Yang   mm/mmap: check on...
910
  	if (file) {
808fbdbea   Wei Yang   mm/mmap: not nece...
911
  		i_mmap_unlock_write(mapping);
7b2d81d48   Ingo Molnar   uprobes/core: Cle...
912
  		uprobe_mmap(vma);
2b1444983   Srikar Dronamraju   uprobes, mm, x86:...
913
914
  
  		if (adjust_next)
7b2d81d48   Ingo Molnar   uprobes/core: Cle...
915
  			uprobe_mmap(next);
2b1444983   Srikar Dronamraju   uprobes, mm, x86:...
916
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
917
  	if (remove_next) {
925d1c401   Matt Helsley   procfs task exe s...
918
  		if (file) {
cbc91f71b   Srikar Dronamraju   uprobes/core: Dec...
919
  			uprobe_munmap(next, next->vm_start, next->vm_end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
920
  			fput(file);
925d1c401   Matt Helsley   procfs task exe s...
921
  		}
5beb49305   Rik van Riel   mm: change anon_v...
922
923
  		if (next->anon_vma)
  			anon_vma_merge(vma, next);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
924
  		mm->map_count--;
3964acd0d   Oleg Nesterov   mm: mempolicy: fi...
925
  		mpol_put(vma_policy(next));
3928d4f5e   Linus Torvalds   mm: use helper fu...
926
  		vm_area_free(next);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
927
928
929
930
931
  		/*
  		 * In mprotect's case 6 (see comments on vma_merge),
  		 * we must remove another next too. It would clutter
  		 * up the code too much to do both in one go.
  		 */
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
  		if (remove_next != 3) {
  			/*
  			 * If "next" was removed and vma->vm_end was
  			 * expanded (up) over it, in turn
  			 * "next->vm_prev->vm_end" changed and the
  			 * "vma->vm_next" gap must be updated.
  			 */
  			next = vma->vm_next;
  		} else {
  			/*
  			 * For the scope of the comment "next" and
  			 * "vma" considered pre-swap(): if "vma" was
  			 * removed, next->vm_start was expanded (down)
  			 * over it and the "next" gap must be updated.
  			 * Because of the swap() the post-swap() "vma"
  			 * actually points to pre-swap() "next"
  			 * (post-swap() "next" as opposed is now a
  			 * dangling pointer).
  			 */
  			next = vma;
  		}
734537c9c   Kirill A. Shutemov   mm: fix use-after...
953
954
955
  		if (remove_next == 2) {
  			remove_next = 1;
  			end = next->vm_end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
956
  			goto again;
734537c9c   Kirill A. Shutemov   mm: fix use-after...
957
  		}
d37371870   Michel Lespinasse   mm: augment vma r...
958
959
  		else if (next)
  			vma_gap_update(next);
fb8c41e9a   Andrea Arcangeli   mm: vma_adjust: r...
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
  		else {
  			/*
  			 * If remove_next == 2 we obviously can't
  			 * reach this path.
  			 *
  			 * If remove_next == 3 we can't reach this
  			 * path because pre-swap() next is always not
  			 * NULL. pre-swap() "next" is not being
  			 * removed and its next->vm_end is not altered
  			 * (and furthermore "end" already matches
  			 * next->vm_end in remove_next == 3).
  			 *
  			 * We reach this only in the remove_next == 1
  			 * case if the "next" vma that was removed was
  			 * the highest vma of the mm. However in such
  			 * case next->vm_end == "end" and the extended
  			 * "vma" has vma->vm_end == next->vm_end so
  			 * mm->highest_vm_end doesn't need any update
  			 * in remove_next == 1 case.
  			 */
1be7107fb   Hugh Dickins   mm: larger stack ...
980
  			VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
fb8c41e9a   Andrea Arcangeli   mm: vma_adjust: r...
981
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
982
  	}
2b1444983   Srikar Dronamraju   uprobes, mm, x86:...
983
  	if (insert && file)
7b2d81d48   Ingo Molnar   uprobes/core: Cle...
984
  		uprobe_mmap(insert);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
985
986
  
  	validate_mm(mm);
5beb49305   Rik van Riel   mm: change anon_v...
987
988
  
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
989
990
991
992
993
994
  }
  
  /*
   * If the vma has a ->close operation then the driver probably needs to release
   * per-vma resources, so we don't attempt to merge those.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
995
  static inline int is_mergeable_vma(struct vm_area_struct *vma,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
996
  				struct file *file, unsigned long vm_flags,
60500a422   Colin Cross   ANDROID: mm: add ...
997
998
  				struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
  				const char __user *anon_name)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
999
  {
34228d473   Cyrill Gorcunov   mm: ignore VM_SOF...
1000
1001
1002
1003
  	/*
  	 * VM_SOFTDIRTY should not prevent from VMA merging, if we
  	 * match the flags but dirty bit -- the caller should mark
  	 * merged VMA as dirty. If dirty bit won't be excluded from
8bb4e7a2e   Wei Yang   mm: fix some typo...
1004
  	 * comparison, we increase pressure on the memory system forcing
34228d473   Cyrill Gorcunov   mm: ignore VM_SOF...
1005
1006
1007
1008
  	 * the kernel to generate new VMAs when old one could be
  	 * extended instead.
  	 */
  	if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1009
1010
1011
1012
1013
  		return 0;
  	if (vma->vm_file != file)
  		return 0;
  	if (vma->vm_ops && vma->vm_ops->close)
  		return 0;
19a809afe   Andrea Arcangeli   userfaultfd: teac...
1014
1015
  	if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
  		return 0;
60500a422   Colin Cross   ANDROID: mm: add ...
1016
1017
  	if (vma_get_anon_name(vma) != anon_name)
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1018
1019
1020
1021
  	return 1;
  }
  
  static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
965f55dea   Shaohua Li   mmap: avoid mergi...
1022
1023
  					struct anon_vma *anon_vma2,
  					struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1024
  {
965f55dea   Shaohua Li   mmap: avoid mergi...
1025
1026
1027
1028
1029
1030
1031
1032
  	/*
  	 * The list_is_singular() test is to avoid merging VMA cloned from
  	 * parents. This can improve scalability caused by anon_vma lock.
  	 */
  	if ((!anon_vma1 || !anon_vma2) && (!vma ||
  		list_is_singular(&vma->anon_vma_chain)))
  		return 1;
  	return anon_vma1 == anon_vma2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
  }
  
  /*
   * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
   * in front of (at a lower virtual address and file offset than) the vma.
   *
   * We cannot merge two vmas if they have differently assigned (non-NULL)
   * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
   *
   * We don't check here for the merged mmap wrapping around the end of pagecache
45e55300f   Peter Collingbourne   mm: remove unnece...
1043
   * indices (16TB on ia32) because do_mmap() does not permit mmap's which
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1044
1045
1046
1047
   * wrap, nor mmaps which cover the final page at index -1UL.
   */
  static int
  can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
1048
1049
  		     struct anon_vma *anon_vma, struct file *file,
  		     pgoff_t vm_pgoff,
60500a422   Colin Cross   ANDROID: mm: add ...
1050
1051
  		     struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
  		     const char __user *anon_name)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1052
  {
60500a422   Colin Cross   ANDROID: mm: add ...
1053
  	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
965f55dea   Shaohua Li   mmap: avoid mergi...
1054
  	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
  		if (vma->vm_pgoff == vm_pgoff)
  			return 1;
  	}
  	return 0;
  }
  
  /*
   * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
   * beyond (at a higher virtual address and file offset than) the vma.
   *
   * We cannot merge two vmas if they have differently assigned (non-NULL)
   * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
   */
  static int
  can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
1070
1071
  		    struct anon_vma *anon_vma, struct file *file,
  		    pgoff_t vm_pgoff,
60500a422   Colin Cross   ANDROID: mm: add ...
1072
1073
  		    struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
  		    const char __user *anon_name)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1074
  {
60500a422   Colin Cross   ANDROID: mm: add ...
1075
  	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
965f55dea   Shaohua Li   mmap: avoid mergi...
1076
  	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1077
  		pgoff_t vm_pglen;
d6e932177   Libin   mm: use vma_pages...
1078
  		vm_pglen = vma_pages(vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1079
1080
1081
1082
1083
1084
1085
  		if (vma->vm_pgoff + vm_pglen == vm_pgoff)
  			return 1;
  	}
  	return 0;
  }
  
  /*
60500a422   Colin Cross   ANDROID: mm: add ...
1086
1087
1088
   * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
   * figure out whether that can be merged with its predecessor or its
   * successor.  Or both (it neatly fills a hole).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
   *
   * In most cases - when called for mmap, brk or mremap - [addr,end) is
   * certain not to be mapped by the time vma_merge is called; but when
   * called for mprotect, it is certain to be already mapped (either at
   * an offset within prev, or at the start of next), and the flags of
   * this area are about to be changed to vm_flags - and the no-change
   * case has already been eliminated.
   *
   * The following mprotect cases have to be considered, where AAAA is
   * the area passed down from mprotect_fixup, never extending beyond one
   * vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
   *
5d42ab293   Wei Yang   mm/mmap.c: make v...
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
   *     AAAA             AAAA                   AAAA
   *    PPPPPPNNNNNN    PPPPPPNNNNNN       PPPPPPNNNNNN
   *    cannot merge    might become       might become
   *                    PPNNNNNNNNNN       PPPPPPPPPPNN
   *    mmap, brk or    case 4 below       case 5 below
   *    mremap move:
   *                        AAAA               AAAA
   *                    PPPP    NNNN       PPPPNNNNXXXX
   *                    might become       might become
   *                    PPPPPPPPPPPP 1 or  PPPPPPPPPPPP 6 or
   *                    PPPPPPPPNNNN 2 or  PPPPPPPPXXXX 7 or
   *                    PPPPNNNNNNNN 3     PPPPXXXXXXXX 8
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1113
   *
8bb4e7a2e   Wei Yang   mm: fix some typo...
1114
   * It is important for case 8 that the vma NNNN overlapping the
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
   * region AAAA is never going to extended over XXXX. Instead XXXX must
   * be extended in region AAAA and NNNN must be removed. This way in
   * all cases where vma_merge succeeds, the moment vma_adjust drops the
   * rmap_locks, the properties of the merged vma will be already
   * correct for the whole merged range. Some of those properties like
   * vm_page_prot/vm_flags may be accessed by rmap_walks and they must
   * be correct for the whole merged range immediately after the
   * rmap_locks are released. Otherwise if XXXX would be removed and
   * NNNN would be extended over the XXXX range, remove_migration_ptes
   * or other rmap walkers (if working on addresses beyond the "end"
   * parameter) may establish ptes with the wrong permissions of NNNN
   * instead of the right permissions of XXXX.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1127
1128
1129
1130
   */
  struct vm_area_struct *vma_merge(struct mm_struct *mm,
  			struct vm_area_struct *prev, unsigned long addr,
  			unsigned long end, unsigned long vm_flags,
cc71aba34   vishnu.ps   mm/mmap.c: whites...
1131
  			struct anon_vma *anon_vma, struct file *file,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
1132
  			pgoff_t pgoff, struct mempolicy *policy,
60500a422   Colin Cross   ANDROID: mm: add ...
1133
1134
  			struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
  			const char __user *anon_name)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1135
1136
1137
  {
  	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
  	struct vm_area_struct *area, *next;
5beb49305   Rik van Riel   mm: change anon_v...
1138
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1139
1140
1141
1142
1143
1144
1145
  
  	/*
  	 * We later require that vma->vm_flags == vm_flags,
  	 * so this tests vma->vm_flags & VM_SPECIAL, too.
  	 */
  	if (vm_flags & VM_SPECIAL)
  		return NULL;
3903b55a6   Liam R. Howlett   mm/mmap: add inli...
1146
  	next = vma_next(mm, prev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1147
  	area = next;
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
1148
  	if (area && area->vm_end == end)		/* cases 6, 7, 8 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1149
  		next = next->vm_next;
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
1150
1151
1152
1153
  	/* verify some invariant that must be enforced by the caller */
  	VM_WARN_ON(prev && addr <= prev->vm_start);
  	VM_WARN_ON(area && end > area->vm_end);
  	VM_WARN_ON(addr >= end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1154
1155
1156
1157
  	/*
  	 * Can it merge with the predecessor?
  	 */
  	if (prev && prev->vm_end == addr &&
cc71aba34   vishnu.ps   mm/mmap.c: whites...
1158
  			mpol_equal(vma_policy(prev), policy) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1159
  			can_vma_merge_after(prev, vm_flags,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
1160
  					    anon_vma, file, pgoff,
60500a422   Colin Cross   ANDROID: mm: add ...
1161
1162
  					    vm_userfaultfd_ctx,
  					    anon_name)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1163
1164
1165
1166
1167
1168
  		/*
  		 * OK, it can.  Can we now merge in the successor as well?
  		 */
  		if (next && end == next->vm_start &&
  				mpol_equal(policy, vma_policy(next)) &&
  				can_vma_merge_before(next, vm_flags,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
1169
1170
  						     anon_vma, file,
  						     pgoff+pglen,
60500a422   Colin Cross   ANDROID: mm: add ...
1171
1172
  						     vm_userfaultfd_ctx,
  						     anon_name) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1173
  				is_mergeable_anon_vma(prev->anon_vma,
965f55dea   Shaohua Li   mmap: avoid mergi...
1174
  						      next->anon_vma, NULL)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175
  							/* cases 1, 6 */
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
1176
1177
1178
  			err = __vma_adjust(prev, prev->vm_start,
  					 next->vm_end, prev->vm_pgoff, NULL,
  					 prev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1179
  		} else					/* cases 2, 5, 7 */
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
1180
1181
  			err = __vma_adjust(prev, prev->vm_start,
  					 end, prev->vm_pgoff, NULL, prev);
5beb49305   Rik van Riel   mm: change anon_v...
1182
1183
  		if (err)
  			return NULL;
6d50e60cd   David Rientjes   mm, thp: fix coll...
1184
  		khugepaged_enter_vma_merge(prev, vm_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1185
1186
1187
1188
1189
1190
1191
  		return prev;
  	}
  
  	/*
  	 * Can this new request be merged in front of next?
  	 */
  	if (next && end == next->vm_start &&
cc71aba34   vishnu.ps   mm/mmap.c: whites...
1192
  			mpol_equal(policy, vma_policy(next)) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1193
  			can_vma_merge_before(next, vm_flags,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
1194
  					     anon_vma, file, pgoff+pglen,
60500a422   Colin Cross   ANDROID: mm: add ...
1195
1196
  					     vm_userfaultfd_ctx,
  					     anon_name)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1197
  		if (prev && addr < prev->vm_end)	/* case 4 */
e86f15ee6   Andrea Arcangeli   mm: vma_merge: fi...
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
  			err = __vma_adjust(prev, prev->vm_start,
  					 addr, prev->vm_pgoff, NULL, next);
  		else {					/* cases 3, 8 */
  			err = __vma_adjust(area, addr, next->vm_end,
  					 next->vm_pgoff - pglen, NULL, next);
  			/*
  			 * In case 3 area is already equal to next and
  			 * this is a noop, but in case 8 "area" has
  			 * been removed and next was expanded over it.
  			 */
  			area = next;
  		}
5beb49305   Rik van Riel   mm: change anon_v...
1210
1211
  		if (err)
  			return NULL;
6d50e60cd   David Rientjes   mm, thp: fix coll...
1212
  		khugepaged_enter_vma_merge(area, vm_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1213
1214
1215
1216
1217
1218
1219
  		return area;
  	}
  
  	return NULL;
  }
  
  /*
b4f315b40   Ethon Paul   mm: mmap: fix a t...
1220
   * Rough compatibility check to quickly see if it's even worth looking
d0e9fe175   Linus Torvalds   Simplify and comm...
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
   * at sharing an anon_vma.
   *
   * They need to have the same vm_file, and the flags can only differ
   * in things that mprotect may change.
   *
   * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
   * we can merge the two vma's. For example, we refuse to merge a vma if
   * there is a vm_ops->close() function, because that indicates that the
   * driver is doing some kind of reference counting. But that doesn't
   * really matter for the anon_vma sharing case.
   */
  static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
  {
  	return a->vm_end == b->vm_start &&
  		mpol_equal(vma_policy(a), vma_policy(b)) &&
  		a->vm_file == b->vm_file &&
6cb4d9a28   Anshuman Khandual   mm/vma: introduce...
1237
  		!((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
d0e9fe175   Linus Torvalds   Simplify and comm...
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
  		b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
  }
  
  /*
   * Do some basic sanity checking to see if we can re-use the anon_vma
   * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
   * the same as 'old', the other will be the new one that is trying
   * to share the anon_vma.
   *
   * NOTE! This runs with mm_sem held for reading, so it is possible that
   * the anon_vma of 'old' is concurrently in the process of being set up
   * by another page fault trying to merge _that_. But that's ok: if it
   * is being set up, that automatically means that it will be a singleton
   * acceptable for merging, so we can do all of this optimistically. But
4db0c3c29   Jason Low   mm: remove rest o...
1252
   * we do that READ_ONCE() to make sure that we never re-load the pointer.
d0e9fe175   Linus Torvalds   Simplify and comm...
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
   *
   * IOW: that the "list_is_singular()" test on the anon_vma_chain only
   * matters for the 'stable anon_vma' case (ie the thing we want to avoid
   * is to return an anon_vma that is "complex" due to having gone through
   * a fork).
   *
   * We also make sure that the two vma's are compatible (adjacent,
   * and with the same memory policies). That's all stable, even with just
   * a read lock on the mm_sem.
   */
  static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
  {
  	if (anon_vma_compatible(a, b)) {
4db0c3c29   Jason Low   mm: remove rest o...
1266
  		struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
d0e9fe175   Linus Torvalds   Simplify and comm...
1267
1268
1269
1270
1271
1272
1273
1274
  
  		if (anon_vma && list_is_singular(&old->anon_vma_chain))
  			return anon_vma;
  	}
  	return NULL;
  }
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1275
1276
1277
1278
1279
1280
1281
1282
1283
   * find_mergeable_anon_vma is used by anon_vma_prepare, to check
   * neighbouring vmas for a suitable anon_vma, before it goes off
   * to allocate a new anon_vma.  It checks because a repetitive
   * sequence of mprotects and faults may otherwise lead to distinct
   * anon_vmas being allocated, preventing vma merge in subsequent
   * mprotect.
   */
  struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
  {
a67c8caae   Miaohe Lin   mm/mmap.c: get ri...
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
  	struct anon_vma *anon_vma = NULL;
  
  	/* Try next first. */
  	if (vma->vm_next) {
  		anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
  		if (anon_vma)
  			return anon_vma;
  	}
  
  	/* Try prev next. */
  	if (vma->vm_prev)
  		anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1296
  	/*
a67c8caae   Miaohe Lin   mm/mmap.c: get ri...
1297
1298
  	 * We might reach here with anon_vma == NULL if we can't find
  	 * any reusable anon_vma.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1299
1300
1301
1302
1303
1304
1305
  	 * There's no absolute need to look only at touching neighbours:
  	 * we could search further afield for "compatible" anon_vmas.
  	 * But it would probably just be a waste of time searching,
  	 * or lead to too many vmas hanging off the same anon_vma.
  	 * We're trying to allow mprotect remerging later on,
  	 * not trying to minimize memory used for anon_vmas.
  	 */
a67c8caae   Miaohe Lin   mm/mmap.c: get ri...
1306
  	return anon_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1307
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1308
  /*
404015308   Al Viro   security: trim se...
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
   * If a hint addr is less than mmap_min_addr change hint to be as
   * low as possible but still greater than mmap_min_addr
   */
  static inline unsigned long round_hint_to_min(unsigned long hint)
  {
  	hint &= PAGE_MASK;
  	if (((void *)hint != NULL) &&
  	    (hint < mmap_min_addr))
  		return PAGE_ALIGN(mmap_min_addr);
  	return hint;
  }
363ee17f0   Davidlohr Bueso   mm/mmap.c: add ml...
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
  static inline int mlock_future_check(struct mm_struct *mm,
  				     unsigned long flags,
  				     unsigned long len)
  {
  	unsigned long locked, lock_limit;
  
  	/*  mlock MCL_FUTURE? */
  	if (flags & VM_LOCKED) {
  		locked = len >> PAGE_SHIFT;
  		locked += mm->locked_vm;
  		lock_limit = rlimit(RLIMIT_MEMLOCK);
  		lock_limit >>= PAGE_SHIFT;
  		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
  			return -EAGAIN;
  	}
  	return 0;
  }
be83bbf80   Linus Torvalds   mmap: introduce s...
1337
1338
1339
  static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
  {
  	if (S_ISREG(inode->i_mode))
423913ad4   Linus Torvalds   mmap: relax file ...
1340
  		return MAX_LFS_FILESIZE;
be83bbf80   Linus Torvalds   mmap: introduce s...
1341
1342
1343
  
  	if (S_ISBLK(inode->i_mode))
  		return MAX_LFS_FILESIZE;
76f349507   Ivan Khoronzhuk   mm: mmap: increas...
1344
1345
  	if (S_ISSOCK(inode->i_mode))
  		return MAX_LFS_FILESIZE;
be83bbf80   Linus Torvalds   mmap: introduce s...
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
  	/* Special "we do even unsigned file positions" case */
  	if (file->f_mode & FMODE_UNSIGNED_OFFSET)
  		return 0;
  
  	/* Yes, random drivers might want more. But I'm tired of buggy drivers */
  	return ULONG_MAX;
  }
  
  static inline bool file_mmap_ok(struct file *file, struct inode *inode,
  				unsigned long pgoff, unsigned long len)
  {
  	u64 maxsize = file_mmap_size_max(file, inode);
  
  	if (maxsize && len > maxsize)
  		return false;
  	maxsize -= len;
  	if (pgoff > maxsize >> PAGE_SHIFT)
  		return false;
  	return true;
  }
404015308   Al Viro   security: trim se...
1366
  /*
3e4e28c5a   Michel Lespinasse   mmap locking API:...
1367
   * The caller must write-lock current->mm->mmap_lock.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1368
   */
1fcfd8db7   Oleg Nesterov   mm, mpx: add "vm_...
1369
  unsigned long do_mmap(struct file *file, unsigned long addr,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1370
  			unsigned long len, unsigned long prot,
45e55300f   Peter Collingbourne   mm: remove unnece...
1371
1372
  			unsigned long flags, unsigned long pgoff,
  			unsigned long *populate, struct list_head *uf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1373
  {
cc71aba34   vishnu.ps   mm/mmap.c: whites...
1374
  	struct mm_struct *mm = current->mm;
45e55300f   Peter Collingbourne   mm: remove unnece...
1375
  	vm_flags_t vm_flags;
62b5f7d01   Dave Hansen   mm/core, x86/mm/p...
1376
  	int pkey = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1377

41badc15c   Michel Lespinasse   mm: make do_mmap_...
1378
  	*populate = 0;
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
1379

e37609bb3   Piotr Kwapulinski   mm/mmap.c: optimi...
1380
1381
  	if (!len)
  		return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1382
1383
1384
1385
1386
1387
1388
  	/*
  	 * Does the application expect PROT_READ to imply PROT_EXEC?
  	 *
  	 * (the exception is when the underlying filesystem is noexec
  	 *  mounted, in which case we dont add PROT_EXEC.)
  	 */
  	if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
90f8572b0   Eric W. Biederman   vfs: Commit to ne...
1389
  		if (!(file && path_noexec(&file->f_path)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1390
  			prot |= PROT_EXEC;
a4ff8e862   Michal Hocko   mm: introduce MAP...
1391
1392
1393
  	/* force arch specific MAP_FIXED handling in get_unmapped_area */
  	if (flags & MAP_FIXED_NOREPLACE)
  		flags |= MAP_FIXED;
7cd94146c   Eric Paris   Security: round m...
1394
1395
  	if (!(flags & MAP_FIXED))
  		addr = round_hint_to_min(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1396
1397
  	/* Careful about overflows.. */
  	len = PAGE_ALIGN(len);
9206de95b   Al Viro   Take arch_mmap_ch...
1398
  	if (!len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1399
1400
1401
1402
  		return -ENOMEM;
  
  	/* offset overflow? */
  	if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
cc71aba34   vishnu.ps   mm/mmap.c: whites...
1403
  		return -EOVERFLOW;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1404
1405
1406
1407
1408
1409
1410
1411
1412
  
  	/* Too many mappings? */
  	if (mm->map_count > sysctl_max_map_count)
  		return -ENOMEM;
  
  	/* Obtain the address to map to. we verify (or select) it and ensure
  	 * that it represents a valid section of the address space.
  	 */
  	addr = get_unmapped_area(file, addr, len, pgoff, flags);
ff68dac6d   Gaowei Pu   mm/mmap.c: use IS...
1413
  	if (IS_ERR_VALUE(addr))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1414
  		return addr;
a4ff8e862   Michal Hocko   mm: introduce MAP...
1415
1416
  	if (flags & MAP_FIXED_NOREPLACE) {
  		struct vm_area_struct *vma = find_vma(mm, addr);
7aa867dd8   Jann Horn   mm/mmap.c: don't ...
1417
  		if (vma && vma->vm_start < addr + len)
a4ff8e862   Michal Hocko   mm: introduce MAP...
1418
1419
  			return -EEXIST;
  	}
62b5f7d01   Dave Hansen   mm/core, x86/mm/p...
1420
1421
1422
1423
1424
  	if (prot == PROT_EXEC) {
  		pkey = execute_only_pkey(mm);
  		if (pkey < 0)
  			pkey = 0;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1425
1426
1427
1428
  	/* Do simple checking here so the lower-level routines won't have
  	 * to. we assume access permissions have been handled by the open
  	 * of the memory object, so we don't do any here.
  	 */
45e55300f   Peter Collingbourne   mm: remove unnece...
1429
  	vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1430
  			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
cdf7b3418   Huang Shijie   mmap: remove unne...
1431
  	if (flags & MAP_LOCKED)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1432
1433
  		if (!can_do_mlock())
  			return -EPERM;
ba470de43   Rik van Riel   mmap: handle mloc...
1434

363ee17f0   Davidlohr Bueso   mm/mmap.c: add ml...
1435
1436
  	if (mlock_future_check(mm, vm_flags, len))
  		return -EAGAIN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1437

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1438
  	if (file) {
077bf22b5   Oleg Nesterov   mm: do_mmap_pgoff...
1439
  		struct inode *inode = file_inode(file);
1c9725974   Dan Williams   mm: introduce MAP...
1440
  		unsigned long flags_mask;
be83bbf80   Linus Torvalds   mmap: introduce s...
1441
1442
  		if (!file_mmap_ok(file, inode, pgoff, len))
  			return -EOVERFLOW;
1c9725974   Dan Williams   mm: introduce MAP...
1443
  		flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
077bf22b5   Oleg Nesterov   mm: do_mmap_pgoff...
1444

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1445
1446
  		switch (flags & MAP_TYPE) {
  		case MAP_SHARED:
1c9725974   Dan Williams   mm: introduce MAP...
1447
1448
1449
1450
1451
1452
1453
1454
  			/*
  			 * Force use of MAP_SHARED_VALIDATE with non-legacy
  			 * flags. E.g. MAP_SYNC is dangerous to use with
  			 * MAP_SHARED as you don't know which consistency model
  			 * you will get. We silently ignore unsupported flags
  			 * with MAP_SHARED to preserve backward compatibility.
  			 */
  			flags &= LEGACY_MAP_MASK;
e4a9bc589   Joe Perches   mm: use fallthrough;
1455
  			fallthrough;
1c9725974   Dan Williams   mm: introduce MAP...
1456
1457
1458
  		case MAP_SHARED_VALIDATE:
  			if (flags & ~flags_mask)
  				return -EOPNOTSUPP;
dc617f29d   Darrick J. Wong   vfs: don't allow ...
1459
1460
1461
1462
1463
1464
  			if (prot & PROT_WRITE) {
  				if (!(file->f_mode & FMODE_WRITE))
  					return -EACCES;
  				if (IS_SWAPFILE(file->f_mapping->host))
  					return -ETXTBSY;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
  
  			/*
  			 * Make sure we don't allow writing to an append-only
  			 * file..
  			 */
  			if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
  				return -EACCES;
  
  			/*
  			 * Make sure there are no mandatory locks on the file.
  			 */
d7a06983a   Jeff Layton   locks: fix locks_...
1476
  			if (locks_verify_locked(file))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1477
1478
1479
1480
1481
  				return -EAGAIN;
  
  			vm_flags |= VM_SHARED | VM_MAYSHARE;
  			if (!(file->f_mode & FMODE_WRITE))
  				vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
e4a9bc589   Joe Perches   mm: use fallthrough;
1482
  			fallthrough;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1483
1484
1485
  		case MAP_PRIVATE:
  			if (!(file->f_mode & FMODE_READ))
  				return -EACCES;
90f8572b0   Eric W. Biederman   vfs: Commit to ne...
1486
  			if (path_noexec(&file->f_path)) {
80c5606c3   Linus Torvalds   Fix VM_MAYEXEC ca...
1487
1488
1489
1490
  				if (vm_flags & VM_EXEC)
  					return -EPERM;
  				vm_flags &= ~VM_MAYEXEC;
  			}
80c5606c3   Linus Torvalds   Fix VM_MAYEXEC ca...
1491

72c2d5319   Al Viro   file->f_op is nev...
1492
  			if (!file->f_op->mmap)
80c5606c3   Linus Torvalds   Fix VM_MAYEXEC ca...
1493
  				return -ENODEV;
b2c56e4f7   Oleg Nesterov   mm: shift VM_GROW...
1494
1495
  			if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
  				return -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1496
1497
1498
1499
1500
1501
1502
1503
  			break;
  
  		default:
  			return -EINVAL;
  		}
  	} else {
  		switch (flags & MAP_TYPE) {
  		case MAP_SHARED:
b2c56e4f7   Oleg Nesterov   mm: shift VM_GROW...
1504
1505
  			if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
  				return -EINVAL;
ce3639426   Tejun Heo   mmap: fix petty b...
1506
1507
1508
1509
  			/*
  			 * Ignore pgoff.
  			 */
  			pgoff = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
  			vm_flags |= VM_SHARED | VM_MAYSHARE;
  			break;
  		case MAP_PRIVATE:
  			/*
  			 * Set pgoff according to addr for anon_vma.
  			 */
  			pgoff = addr >> PAGE_SHIFT;
  			break;
  		default:
  			return -EINVAL;
  		}
  	}
c22c0d634   Michel Lespinasse   mm: remove flags ...
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
  	/*
  	 * Set 'VM_NORESERVE' if we should not account for the
  	 * memory use of this mapping.
  	 */
  	if (flags & MAP_NORESERVE) {
  		/* We honor MAP_NORESERVE if allowed to overcommit */
  		if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
  			vm_flags |= VM_NORESERVE;
  
  		/* hugetlb applies strict overcommit unless MAP_NORESERVE */
  		if (file && is_file_hugepages(file))
  			vm_flags |= VM_NORESERVE;
  	}
897ab3e0c   Mike Rapoport   userfaultfd: non-...
1535
  	addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
09a9f1d27   Michel Lespinasse   Revert "mm: intro...
1536
1537
1538
  	if (!IS_ERR_VALUE(addr) &&
  	    ((vm_flags & VM_LOCKED) ||
  	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
41badc15c   Michel Lespinasse   mm: make do_mmap_...
1539
  		*populate = len;
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
1540
  	return addr;
0165ab443   Miklos Szeredi   split mmap
1541
  }
6be5ceb02   Linus Torvalds   VM: add "vm_mmap(...
1542

a90f590a1   Dominik Brodowski   mm: add ksys_mmap...
1543
1544
1545
  unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
  			      unsigned long prot, unsigned long flags,
  			      unsigned long fd, unsigned long pgoff)
66f0dc481   Hugh Dickins   mm: move sys_mmap...
1546
1547
  {
  	struct file *file = NULL;
1e3ee14b9   Chen Gang   mm/mmap.c: do not...
1548
  	unsigned long retval;
66f0dc481   Hugh Dickins   mm: move sys_mmap...
1549
1550
  
  	if (!(flags & MAP_ANONYMOUS)) {
120a795da   Al Viro   audit mmap
1551
  		audit_mmap_fd(fd, flags);
66f0dc481   Hugh Dickins   mm: move sys_mmap...
1552
1553
  		file = fget(fd);
  		if (!file)
1e3ee14b9   Chen Gang   mm/mmap.c: do not...
1554
  			return -EBADF;
7bba8f0ea   Zhen Lei   mm/mmap: optimize...
1555
  		if (is_file_hugepages(file)) {
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1556
  			len = ALIGN(len, huge_page_size(hstate_file(file)));
7bba8f0ea   Zhen Lei   mm/mmap: optimize...
1557
1558
  		} else if (unlikely(flags & MAP_HUGETLB)) {
  			retval = -EINVAL;
493af5780   Jörn Engel   mmap: allow MAP_H...
1559
  			goto out_fput;
7bba8f0ea   Zhen Lei   mm/mmap: optimize...
1560
  		}
66f0dc481   Hugh Dickins   mm: move sys_mmap...
1561
1562
  	} else if (flags & MAP_HUGETLB) {
  		struct user_struct *user = NULL;
c103a4dc4   Andrew Morton   ipc/shmc.c: elimi...
1563
  		struct hstate *hs;
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1564

20ac28933   Anshuman Khandual   mm/mmap: replace ...
1565
  		hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
091d0d55b   Li Zefan   shm: fix null poi...
1566
1567
1568
1569
  		if (!hs)
  			return -EINVAL;
  
  		len = ALIGN(len, huge_page_size(hs));
66f0dc481   Hugh Dickins   mm: move sys_mmap...
1570
1571
1572
1573
1574
1575
  		/*
  		 * VM_NORESERVE is used because the reservations will be
  		 * taken when vm_ops->mmap() is called
  		 * A dummy user value is used because we are not locking
  		 * memory so no accounting is necessary
  		 */
af73e4d95   Naoya Horiguchi   hugetlbfs: fix mm...
1576
  		file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
42d7395fe   Andi Kleen   mm: support more ...
1577
1578
1579
  				VM_NORESERVE,
  				&user, HUGETLB_ANONHUGE_INODE,
  				(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
66f0dc481   Hugh Dickins   mm: move sys_mmap...
1580
1581
1582
1583
1584
  		if (IS_ERR(file))
  			return PTR_ERR(file);
  	}
  
  	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
9fbeb5ab5   Michal Hocko   mm: make vm_mmap ...
1585
  	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
493af5780   Jörn Engel   mmap: allow MAP_H...
1586
  out_fput:
66f0dc481   Hugh Dickins   mm: move sys_mmap...
1587
1588
  	if (file)
  		fput(file);
66f0dc481   Hugh Dickins   mm: move sys_mmap...
1589
1590
  	return retval;
  }
a90f590a1   Dominik Brodowski   mm: add ksys_mmap...
1591
1592
1593
1594
1595
1596
  SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
  		unsigned long, prot, unsigned long, flags,
  		unsigned long, fd, unsigned long, pgoff)
  {
  	return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
  }
a4679373c   Christoph Hellwig   Add generic sys_o...
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
  #ifdef __ARCH_WANT_SYS_OLD_MMAP
  struct mmap_arg_struct {
  	unsigned long addr;
  	unsigned long len;
  	unsigned long prot;
  	unsigned long flags;
  	unsigned long fd;
  	unsigned long offset;
  };
  
  SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
  {
  	struct mmap_arg_struct a;
  
  	if (copy_from_user(&a, arg, sizeof(a)))
  		return -EFAULT;
de1741a13   Alexander Kuleshov   mm/mmap: use offs...
1613
  	if (offset_in_page(a.offset))
a4679373c   Christoph Hellwig   Add generic sys_o...
1614
  		return -EINVAL;
a90f590a1   Dominik Brodowski   mm: add ksys_mmap...
1615
1616
  	return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
  			       a.offset >> PAGE_SHIFT);
a4679373c   Christoph Hellwig   Add generic sys_o...
1617
1618
  }
  #endif /* __ARCH_WANT_SYS_OLD_MMAP */
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
1619
  /*
8bb4e7a2e   Wei Yang   mm: fix some typo...
1620
   * Some shared mappings will want the pages marked read-only
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
1621
1622
1623
1624
   * to track write events. If so, we'll downgrade vm_page_prot
   * to the private version (using protection_map[] without the
   * VM_SHARED bit).
   */
6d2329f88   Andrea Arcangeli   mm: vm_page_prot:...
1625
  int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
1626
  {
ca16d140a   KOSAKI Motohiro   mm: don't access ...
1627
  	vm_flags_t vm_flags = vma->vm_flags;
8a04446ab   Kirill A. Shutemov   mm, dax: VMA with...
1628
  	const struct vm_operations_struct *vm_ops = vma->vm_ops;
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
1629
1630
1631
1632
1633
1634
  
  	/* If it was private or non-writable, the write bit is already clear */
  	if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
  		return 0;
  
  	/* The backer wishes to know when pages are first written to? */
8a04446ab   Kirill A. Shutemov   mm, dax: VMA with...
1635
  	if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
1636
  		return 1;
64e455079   Peter Feiner   mm: softdirty: en...
1637
1638
  	/* The open routine did something to the protections that pgprot_modify
  	 * won't preserve? */
6d2329f88   Andrea Arcangeli   mm: vm_page_prot:...
1639
1640
  	if (pgprot_val(vm_page_prot) !=
  	    pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
1641
  		return 0;
64e455079   Peter Feiner   mm: softdirty: en...
1642
1643
1644
  	/* Do we need to track softdirty? */
  	if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
  		return 1;
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
1645
  	/* Specialty mapping? */
4b6e1e370   Konstantin Khlebnikov   mm: kill vma flag...
1646
  	if (vm_flags & VM_PFNMAP)
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
1647
1648
1649
1650
  		return 0;
  
  	/* Can the mapping track the dirty pages? */
  	return vma->vm_file && vma->vm_file->f_mapping &&
f56753ac2   Christoph Hellwig   bdi: replace BDI_...
1651
  		mapping_can_writeback(vma->vm_file->f_mapping);
4e950f6f0   Alexey Dobriyan   Remove fs.h from ...
1652
  }
fc8744adc   Linus Torvalds   Stop playing sill...
1653
1654
  /*
   * We account for memory if it's a private writeable mapping,
5a6fe1259   Mel Gorman   Do not account fo...
1655
   * not hugepages and VM_NORESERVE wasn't set.
fc8744adc   Linus Torvalds   Stop playing sill...
1656
   */
ca16d140a   KOSAKI Motohiro   mm: don't access ...
1657
  static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
fc8744adc   Linus Torvalds   Stop playing sill...
1658
  {
5a6fe1259   Mel Gorman   Do not account fo...
1659
1660
1661
1662
1663
1664
  	/*
  	 * hugetlb has its own accounting separate from the core VM
  	 * VM_HUGETLB may not be set yet so we cannot check for that flag.
  	 */
  	if (file && is_file_hugepages(file))
  		return 0;
fc8744adc   Linus Torvalds   Stop playing sill...
1665
1666
  	return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
  }
0165ab443   Miklos Szeredi   split mmap
1667
  unsigned long mmap_region(struct file *file, unsigned long addr,
897ab3e0c   Mike Rapoport   userfaultfd: non-...
1668
1669
  		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
  		struct list_head *uf)
0165ab443   Miklos Szeredi   split mmap
1670
1671
  {
  	struct mm_struct *mm = current->mm;
d70cec898   Miaohe Lin   mm: mmap: merge v...
1672
  	struct vm_area_struct *vma, *prev, *merge;
0165ab443   Miklos Szeredi   split mmap
1673
1674
1675
  	int error;
  	struct rb_node **rb_link, *rb_parent;
  	unsigned long charged = 0;
0165ab443   Miklos Szeredi   split mmap
1676

e8420a8ec   Cyril Hrubis   mm/mmap: check fo...
1677
  	/* Check against address space limit. */
846383359   Konstantin Khlebnikov   mm: rework virtua...
1678
  	if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
e8420a8ec   Cyril Hrubis   mm/mmap: check fo...
1679
1680
1681
1682
1683
1684
  		unsigned long nr_pages;
  
  		/*
  		 * MAP_FIXED may remove pages of mappings that intersects with
  		 * requested mapping. Account for the pages it would unmap.
  		 */
e8420a8ec   Cyril Hrubis   mm/mmap: check fo...
1685
  		nr_pages = count_vma_pages_range(mm, addr, addr + len);
846383359   Konstantin Khlebnikov   mm: rework virtua...
1686
1687
  		if (!may_expand_vm(mm, vm_flags,
  					(len >> PAGE_SHIFT) - nr_pages))
e8420a8ec   Cyril Hrubis   mm/mmap: check fo...
1688
1689
  			return -ENOMEM;
  	}
fb8090b69   Liam R. Howlett   mm/mmap: add inli...
1690
1691
1692
  	/* Clear old maps, set up prev, rb_link, rb_parent, and uf */
  	if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
  		return -ENOMEM;
fc8744adc   Linus Torvalds   Stop playing sill...
1693
  	/*
fc8744adc   Linus Torvalds   Stop playing sill...
1694
1695
  	 * Private writable mapping: check memory availability
  	 */
5a6fe1259   Mel Gorman   Do not account fo...
1696
  	if (accountable_mapping(file, vm_flags)) {
fc8744adc   Linus Torvalds   Stop playing sill...
1697
  		charged = len >> PAGE_SHIFT;
191c54244   Al Viro   mm: collapse secu...
1698
  		if (security_vm_enough_memory_mm(mm, charged))
fc8744adc   Linus Torvalds   Stop playing sill...
1699
1700
  			return -ENOMEM;
  		vm_flags |= VM_ACCOUNT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1701
1702
1703
  	}
  
  	/*
de33c8db5   Linus Torvalds   Fix OOPS in mmap_...
1704
  	 * Can we just expand an old mapping?
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1705
  	 */
19a809afe   Andrea Arcangeli   userfaultfd: teac...
1706
  	vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
60500a422   Colin Cross   ANDROID: mm: add ...
1707
  			NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
de33c8db5   Linus Torvalds   Fix OOPS in mmap_...
1708
1709
  	if (vma)
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1710
1711
1712
1713
1714
1715
  
  	/*
  	 * Determine the object being mapped and call the appropriate
  	 * specific mapper. the address has already been validated, but
  	 * not unmapped, but the maps are removed from the list.
  	 */
490fc0538   Linus Torvalds   mm: make vm_area_...
1716
  	vma = vm_area_alloc(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1717
1718
1719
1720
  	if (!vma) {
  		error = -ENOMEM;
  		goto unacct_error;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1721

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1722
1723
1724
  	vma->vm_start = addr;
  	vma->vm_end = addr + len;
  	vma->vm_flags = vm_flags;
3ed75eb8f   Coly Li   setup vma->vm_pag...
1725
  	vma->vm_page_prot = vm_get_page_prot(vm_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1726
1727
1728
  	vma->vm_pgoff = pgoff;
  
  	if (file) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1729
1730
1731
1732
  		if (vm_flags & VM_DENYWRITE) {
  			error = deny_write_access(file);
  			if (error)
  				goto free_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1733
  		}
4bb5f5d93   David Herrmann   mm: allow drivers...
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
  		if (vm_flags & VM_SHARED) {
  			error = mapping_map_writable(file->f_mapping);
  			if (error)
  				goto allow_write_and_free_vma;
  		}
  
  		/* ->mmap() can change vma->vm_file, but must guarantee that
  		 * vma_link() below can deny write-access if VM_DENYWRITE is set
  		 * and map writably if VM_SHARED is set. This usually means the
  		 * new file must not have been exposed to user-space, yet.
  		 */
cb0942b81   Al Viro   make get_file() r...
1745
  		vma->vm_file = get_file(file);
f74ac0152   Miklos Szeredi   mm: use helper fo...
1746
  		error = call_mmap(file, vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1747
1748
  		if (error)
  			goto unmap_and_free_vma;
f8dbf0a7a   Huang Shijie   mmap: save some c...
1749

309d08d9b   Liu Zixian   mm/mmap.c: fix mm...
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
  		/* Can addr have changed??
  		 *
  		 * Answer: Yes, several device drivers can do it in their
  		 *         f_op->mmap method. -DaveM
  		 * Bug: If addr is changed, prev, rb_link, rb_parent should
  		 *      be updated for vma_link()
  		 */
  		WARN_ON_ONCE(addr != vma->vm_start);
  
  		addr = vma->vm_start;
d70cec898   Miaohe Lin   mm: mmap: merge v...
1760
1761
1762
1763
1764
  		/* If vm_flags changed after call_mmap(), we should try merge vma again
  		 * as we may succeed this time.
  		 */
  		if (unlikely(vm_flags != vma->vm_flags && prev)) {
  			merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
7df9282d8   Greg Kroah-Hartman   ANDROID: mm: fix ...
1765
1766
  				NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX,
  				vma_get_anon_name(vma));
d70cec898   Miaohe Lin   mm: mmap: merge v...
1767
  			if (merge) {
bc4fe4cdd   Miaohe Lin   mm: mmap: Fix gen...
1768
1769
1770
1771
1772
  				/* ->mmap() can change vma->vm_file and fput the original file. So
  				 * fput the vma->vm_file here or we would add an extra fput for file
  				 * and cause general protection fault ultimately.
  				 */
  				fput(vma->vm_file);
d70cec898   Miaohe Lin   mm: mmap: merge v...
1773
1774
  				vm_area_free(vma);
  				vma = merge;
309d08d9b   Liu Zixian   mm/mmap.c: fix mm...
1775
  				/* Update vm_flags to pick up the change. */
d70cec898   Miaohe Lin   mm: mmap: merge v...
1776
1777
1778
1779
  				vm_flags = vma->vm_flags;
  				goto unmap_writable;
  			}
  		}
f8dbf0a7a   Huang Shijie   mmap: save some c...
1780
  		vm_flags = vma->vm_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1781
1782
1783
1784
  	} else if (vm_flags & VM_SHARED) {
  		error = shmem_zero_setup(vma);
  		if (error)
  			goto free_vma;
bfd40eaff   Kirill A. Shutemov   mm: fix vma_is_an...
1785
1786
  	} else {
  		vma_set_anonymous(vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1787
  	}
c462ac288   Catalin Marinas   mm: Introduce arc...
1788
1789
1790
1791
1792
1793
1794
1795
  	/* Allow architectures to sanity-check the vm_flags */
  	if (!arch_validate_flags(vma->vm_flags)) {
  		error = -EINVAL;
  		if (file)
  			goto unmap_and_free_vma;
  		else
  			goto free_vma;
  	}
de33c8db5   Linus Torvalds   Fix OOPS in mmap_...
1796
  	vma_link(mm, vma, prev, rb_link, rb_parent);
4d3d5b41a   Oleg Nesterov   mmap_region: clea...
1797
  	/* Once vma denies write, undo our temporary denial count */
4bb5f5d93   David Herrmann   mm: allow drivers...
1798
  	if (file) {
d70cec898   Miaohe Lin   mm: mmap: merge v...
1799
  unmap_writable:
4bb5f5d93   David Herrmann   mm: allow drivers...
1800
1801
1802
1803
1804
  		if (vm_flags & VM_SHARED)
  			mapping_unmap_writable(file->f_mapping);
  		if (vm_flags & VM_DENYWRITE)
  			allow_write_access(file);
  	}
e86867720   Oleg Nesterov   mm: mmap_region: ...
1805
  	file = vma->vm_file;
4d3d5b41a   Oleg Nesterov   mmap_region: clea...
1806
  out:
cdd6c482c   Ingo Molnar   perf: Do the big ...
1807
  	perf_event_mmap(vma);
0a4a93919   Peter Zijlstra   perf_counter: exe...
1808

846383359   Konstantin Khlebnikov   mm: rework virtua...
1809
  	vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1810
  	if (vm_flags & VM_LOCKED) {
e1fb4a086   Dave Jiang   dax: remove VM_MI...
1811
1812
1813
  		if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
  					is_vm_hugetlb_page(vma) ||
  					vma == get_gate_vma(current->mm))
de60f5f10   Eric B Munson   mm: introduce VM_...
1814
  			vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
e1fb4a086   Dave Jiang   dax: remove VM_MI...
1815
1816
  		else
  			mm->locked_vm += (len >> PAGE_SHIFT);
bebeb3d68   Michel Lespinasse   mm: introduce mm_...
1817
  	}
2b1444983   Srikar Dronamraju   uprobes, mm, x86:...
1818

c7a3a88c9   Oleg Nesterov   uprobes: Fix mmap...
1819
1820
  	if (file)
  		uprobe_mmap(vma);
2b1444983   Srikar Dronamraju   uprobes, mm, x86:...
1821

d9104d1ca   Cyrill Gorcunov   mm: track vma cha...
1822
1823
1824
1825
1826
1827
1828
1829
  	/*
  	 * New (or expanded) vma always get soft dirty status.
  	 * Otherwise user-space soft-dirty page tracker won't
  	 * be able to distinguish situation when vma area unmapped,
  	 * then new mapped in-place (which must be aimed as
  	 * a completely new data area).
  	 */
  	vma->vm_flags |= VM_SOFTDIRTY;
64e455079   Peter Feiner   mm: softdirty: en...
1830
  	vma_set_page_prot(vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1831
1832
1833
  	return addr;
  
  unmap_and_free_vma:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1834
1835
1836
1837
  	vma->vm_file = NULL;
  	fput(file);
  
  	/* Undo any partial mapping done by a device driver. */
e0da382c9   Hugh Dickins   [PATCH] freepgt: ...
1838
1839
  	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
  	charged = 0;
4bb5f5d93   David Herrmann   mm: allow drivers...
1840
1841
1842
1843
1844
  	if (vm_flags & VM_SHARED)
  		mapping_unmap_writable(file->f_mapping);
  allow_write_and_free_vma:
  	if (vm_flags & VM_DENYWRITE)
  		allow_write_access(file);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1845
  free_vma:
3928d4f5e   Linus Torvalds   mm: use helper fu...
1846
  	vm_area_free(vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1847
1848
1849
1850
1851
  unacct_error:
  	if (charged)
  		vm_unacct_memory(charged);
  	return error;
  }
baceaf1c8   Jaewon Kim   mmap: remove inli...
1852
  static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
  {
  	/*
  	 * We implement the search by looking for an rbtree node that
  	 * immediately follows a suitable gap. That is,
  	 * - gap_start = vma->vm_prev->vm_end <= info->high_limit - length;
  	 * - gap_end   = vma->vm_start        >= info->low_limit  + length;
  	 * - gap_end - gap_start >= length
  	 */
  
  	struct mm_struct *mm = current->mm;
  	struct vm_area_struct *vma;
  	unsigned long length, low_limit, high_limit, gap_start, gap_end;
  
  	/* Adjust search length to account for worst case alignment overhead */
  	length = info->length + info->align_mask;
  	if (length < info->length)
  		return -ENOMEM;
  
  	/* Adjust search limits by the desired length */
  	if (info->high_limit < length)
  		return -ENOMEM;
  	high_limit = info->high_limit - length;
  
  	if (info->low_limit > high_limit)
  		return -ENOMEM;
  	low_limit = info->low_limit + length;
  
  	/* Check if rbtree root looks promising */
  	if (RB_EMPTY_ROOT(&mm->mm_rb))
  		goto check_highest;
  	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
  	if (vma->rb_subtree_gap < length)
  		goto check_highest;
  
  	while (true) {
  		/* Visit left subtree if it looks promising */
1be7107fb   Hugh Dickins   mm: larger stack ...
1889
  		gap_end = vm_start_gap(vma);
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
1890
1891
1892
1893
1894
1895
1896
1897
1898
  		if (gap_end >= low_limit && vma->vm_rb.rb_left) {
  			struct vm_area_struct *left =
  				rb_entry(vma->vm_rb.rb_left,
  					 struct vm_area_struct, vm_rb);
  			if (left->rb_subtree_gap >= length) {
  				vma = left;
  				continue;
  			}
  		}
1be7107fb   Hugh Dickins   mm: larger stack ...
1899
  		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
1900
1901
1902
1903
  check_current:
  		/* Check if current node has a suitable gap */
  		if (gap_start > high_limit)
  			return -ENOMEM;
f4cb767d7   Hugh Dickins   mm: fix new crash...
1904
1905
  		if (gap_end >= low_limit &&
  		    gap_end > gap_start && gap_end - gap_start >= length)
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
  			goto found;
  
  		/* Visit right subtree if it looks promising */
  		if (vma->vm_rb.rb_right) {
  			struct vm_area_struct *right =
  				rb_entry(vma->vm_rb.rb_right,
  					 struct vm_area_struct, vm_rb);
  			if (right->rb_subtree_gap >= length) {
  				vma = right;
  				continue;
  			}
  		}
  
  		/* Go back up the rbtree to find next candidate node */
  		while (true) {
  			struct rb_node *prev = &vma->vm_rb;
  			if (!rb_parent(prev))
  				goto check_highest;
  			vma = rb_entry(rb_parent(prev),
  				       struct vm_area_struct, vm_rb);
  			if (prev == vma->vm_rb.rb_left) {
1be7107fb   Hugh Dickins   mm: larger stack ...
1927
1928
  				gap_start = vm_end_gap(vma->vm_prev);
  				gap_end = vm_start_gap(vma);
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
  				goto check_current;
  			}
  		}
  	}
  
  check_highest:
  	/* Check highest gap, which does not precede any rbtree node */
  	gap_start = mm->highest_vm_end;
  	gap_end = ULONG_MAX;  /* Only for VM_BUG_ON below */
  	if (gap_start > high_limit)
  		return -ENOMEM;
  
  found:
  	/* We found a suitable gap. Clip it with the original low_limit. */
  	if (gap_start < info->low_limit)
  		gap_start = info->low_limit;
  
  	/* Adjust gap address to the desired alignment */
  	gap_start += (info->align_offset - gap_start) & info->align_mask;
  
  	VM_BUG_ON(gap_start + info->length > info->high_limit);
  	VM_BUG_ON(gap_start + info->length > gap_end);
  	return gap_start;
  }
baceaf1c8   Jaewon Kim   mmap: remove inli...
1953
  static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
  {
  	struct mm_struct *mm = current->mm;
  	struct vm_area_struct *vma;
  	unsigned long length, low_limit, high_limit, gap_start, gap_end;
  
  	/* Adjust search length to account for worst case alignment overhead */
  	length = info->length + info->align_mask;
  	if (length < info->length)
  		return -ENOMEM;
  
  	/*
  	 * Adjust search limits by the desired length.
  	 * See implementation comment at top of unmapped_area().
  	 */
  	gap_end = info->high_limit;
  	if (gap_end < length)
  		return -ENOMEM;
  	high_limit = gap_end - length;
  
  	if (info->low_limit > high_limit)
  		return -ENOMEM;
  	low_limit = info->low_limit + length;
  
  	/* Check highest gap, which does not precede any rbtree node */
  	gap_start = mm->highest_vm_end;
  	if (gap_start <= high_limit)
  		goto found_highest;
  
  	/* Check if rbtree root looks promising */
  	if (RB_EMPTY_ROOT(&mm->mm_rb))
  		return -ENOMEM;
  	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
  	if (vma->rb_subtree_gap < length)
  		return -ENOMEM;
  
  	while (true) {
  		/* Visit right subtree if it looks promising */
1be7107fb   Hugh Dickins   mm: larger stack ...
1991
  		gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
  		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
  			struct vm_area_struct *right =
  				rb_entry(vma->vm_rb.rb_right,
  					 struct vm_area_struct, vm_rb);
  			if (right->rb_subtree_gap >= length) {
  				vma = right;
  				continue;
  			}
  		}
  
  check_current:
  		/* Check if current node has a suitable gap */
1be7107fb   Hugh Dickins   mm: larger stack ...
2004
  		gap_end = vm_start_gap(vma);
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2005
2006
  		if (gap_end < low_limit)
  			return -ENOMEM;
f4cb767d7   Hugh Dickins   mm: fix new crash...
2007
2008
  		if (gap_start <= high_limit &&
  		    gap_end > gap_start && gap_end - gap_start >= length)
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
  			goto found;
  
  		/* Visit left subtree if it looks promising */
  		if (vma->vm_rb.rb_left) {
  			struct vm_area_struct *left =
  				rb_entry(vma->vm_rb.rb_left,
  					 struct vm_area_struct, vm_rb);
  			if (left->rb_subtree_gap >= length) {
  				vma = left;
  				continue;
  			}
  		}
  
  		/* Go back up the rbtree to find next candidate node */
  		while (true) {
  			struct rb_node *prev = &vma->vm_rb;
  			if (!rb_parent(prev))
  				return -ENOMEM;
  			vma = rb_entry(rb_parent(prev),
  				       struct vm_area_struct, vm_rb);
  			if (prev == vma->vm_rb.rb_right) {
  				gap_start = vma->vm_prev ?
1be7107fb   Hugh Dickins   mm: larger stack ...
2031
  					vm_end_gap(vma->vm_prev) : 0;
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
  				goto check_current;
  			}
  		}
  	}
  
  found:
  	/* We found a suitable gap. Clip it with the original high_limit. */
  	if (gap_end > info->high_limit)
  		gap_end = info->high_limit;
  
  found_highest:
  	/* Compute highest gap address at the desired alignment */
  	gap_end -= info->length;
  	gap_end -= (gap_end - info->align_offset) & info->align_mask;
  
  	VM_BUG_ON(gap_end < info->low_limit);
  	VM_BUG_ON(gap_end < gap_start);
  	return gap_end;
  }
baceaf1c8   Jaewon Kim   mmap: remove inli...
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
  /*
   * Search for an unmapped address range.
   *
   * We are looking for a range that:
   * - does not intersect with any VMA;
   * - is contained within the [low_limit, high_limit) interval;
   * - is at least the desired size.
   * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
   */
  unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
  {
df529cabb   Jaewon Kim   mm: mmap: add tra...
2062
  	unsigned long addr;
baceaf1c8   Jaewon Kim   mmap: remove inli...
2063
  	if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
df529cabb   Jaewon Kim   mm: mmap: add tra...
2064
  		addr = unmapped_area_topdown(info);
baceaf1c8   Jaewon Kim   mmap: remove inli...
2065
  	else
df529cabb   Jaewon Kim   mm: mmap: add tra...
2066
2067
2068
2069
  		addr = unmapped_area(info);
  
  	trace_vm_unmapped_area(addr, info);
  	return addr;
baceaf1c8   Jaewon Kim   mmap: remove inli...
2070
  }
f6795053d   Steve Capper   mm: mmap: Allow f...
2071
2072
2073
2074
2075
2076
2077
2078
  
  #ifndef arch_get_mmap_end
  #define arch_get_mmap_end(addr)	(TASK_SIZE)
  #endif
  
  #ifndef arch_get_mmap_base
  #define arch_get_mmap_base(addr, base) (base)
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
  /* Get an address range which is currently unmapped.
   * For shmat() with addr=0.
   *
   * Ugly calling convention alert:
   * Return value with the low bits set means error value,
   * ie
   *	if (ret & ~PAGE_MASK)
   *		error = ret;
   *
   * This function "knows" that -ENOMEM has the bits set.
   */
  #ifndef HAVE_ARCH_UNMAPPED_AREA
  unsigned long
  arch_get_unmapped_area(struct file *filp, unsigned long addr,
  		unsigned long len, unsigned long pgoff, unsigned long flags)
  {
  	struct mm_struct *mm = current->mm;
1be7107fb   Hugh Dickins   mm: larger stack ...
2096
  	struct vm_area_struct *vma, *prev;
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2097
  	struct vm_unmapped_area_info info;
f6795053d   Steve Capper   mm: mmap: Allow f...
2098
  	const unsigned long mmap_end = arch_get_mmap_end(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2099

f6795053d   Steve Capper   mm: mmap: Allow f...
2100
  	if (len > mmap_end - mmap_min_addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2101
  		return -ENOMEM;
06abdfb47   Benjamin Herrenschmidt   get_unmapped_area...
2102
2103
  	if (flags & MAP_FIXED)
  		return addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2104
2105
  	if (addr) {
  		addr = PAGE_ALIGN(addr);
1be7107fb   Hugh Dickins   mm: larger stack ...
2106
  		vma = find_vma_prev(mm, addr, &prev);
f6795053d   Steve Capper   mm: mmap: Allow f...
2107
  		if (mmap_end - len >= addr && addr >= mmap_min_addr &&
1be7107fb   Hugh Dickins   mm: larger stack ...
2108
2109
  		    (!vma || addr + len <= vm_start_gap(vma)) &&
  		    (!prev || addr >= vm_end_gap(prev)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2110
2111
  			return addr;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2112

db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2113
2114
  	info.flags = 0;
  	info.length = len;
4e99b0213   Heiko Carstens   mmap: arch_get_un...
2115
  	info.low_limit = mm->mmap_base;
f6795053d   Steve Capper   mm: mmap: Allow f...
2116
  	info.high_limit = mmap_end;
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2117
  	info.align_mask = 0;
09ef5283f   Jaewon Kim   mm/mmap.c: initia...
2118
  	info.align_offset = 0;
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2119
  	return vm_unmapped_area(&info);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2120
  }
cc71aba34   vishnu.ps   mm/mmap.c: whites...
2121
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2122

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2123
2124
2125
2126
2127
2128
  /*
   * This mmap-allocator allocates new areas top-down from below the
   * stack's low limit (the base):
   */
  #ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
  unsigned long
43cca0b1c   Yang Fan   mm/mmap.c: remove...
2129
2130
2131
  arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
  			  unsigned long len, unsigned long pgoff,
  			  unsigned long flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2132
  {
1be7107fb   Hugh Dickins   mm: larger stack ...
2133
  	struct vm_area_struct *vma, *prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2134
  	struct mm_struct *mm = current->mm;
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2135
  	struct vm_unmapped_area_info info;
f6795053d   Steve Capper   mm: mmap: Allow f...
2136
  	const unsigned long mmap_end = arch_get_mmap_end(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2137
2138
  
  	/* requested length too big for entire address space */
f6795053d   Steve Capper   mm: mmap: Allow f...
2139
  	if (len > mmap_end - mmap_min_addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2140
  		return -ENOMEM;
06abdfb47   Benjamin Herrenschmidt   get_unmapped_area...
2141
2142
  	if (flags & MAP_FIXED)
  		return addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2143
2144
2145
  	/* requesting a specific address */
  	if (addr) {
  		addr = PAGE_ALIGN(addr);
1be7107fb   Hugh Dickins   mm: larger stack ...
2146
  		vma = find_vma_prev(mm, addr, &prev);
f6795053d   Steve Capper   mm: mmap: Allow f...
2147
  		if (mmap_end - len >= addr && addr >= mmap_min_addr &&
1be7107fb   Hugh Dickins   mm: larger stack ...
2148
2149
  				(!vma || addr + len <= vm_start_gap(vma)) &&
  				(!prev || addr >= vm_end_gap(prev)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2150
2151
  			return addr;
  	}
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2152
2153
  	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
  	info.length = len;
2afc745f3   Akira Takeuchi   mm: ensure get_un...
2154
  	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
f6795053d   Steve Capper   mm: mmap: Allow f...
2155
  	info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2156
  	info.align_mask = 0;
09ef5283f   Jaewon Kim   mm/mmap.c: initia...
2157
  	info.align_offset = 0;
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2158
  	addr = vm_unmapped_area(&info);
b716ad953   Xiao Guangrong   mm: search from f...
2159

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2160
2161
2162
2163
2164
2165
  	/*
  	 * A failed mmap() very likely causes application failure,
  	 * so fall back to the bottom-up function here. This scenario
  	 * can happen with large stack limits and large mmap()
  	 * allocations.
  	 */
de1741a13   Alexander Kuleshov   mm/mmap: use offs...
2166
  	if (offset_in_page(addr)) {
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2167
2168
2169
  		VM_BUG_ON(addr != -ENOMEM);
  		info.flags = 0;
  		info.low_limit = TASK_UNMAPPED_BASE;
f6795053d   Steve Capper   mm: mmap: Allow f...
2170
  		info.high_limit = mmap_end;
db4fbfb95   Michel Lespinasse   mm: vm_unmapped_a...
2171
2172
  		addr = vm_unmapped_area(&info);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2173
2174
2175
2176
  
  	return addr;
  }
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2177
2178
2179
2180
  unsigned long
  get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
  		unsigned long pgoff, unsigned long flags)
  {
06abdfb47   Benjamin Herrenschmidt   get_unmapped_area...
2181
2182
  	unsigned long (*get_area)(struct file *, unsigned long,
  				  unsigned long, unsigned long, unsigned long);
9206de95b   Al Viro   Take arch_mmap_ch...
2183
2184
2185
2186
2187
2188
2189
  	unsigned long error = arch_mmap_check(addr, len, flags);
  	if (error)
  		return error;
  
  	/* Careful about overflows.. */
  	if (len > TASK_SIZE)
  		return -ENOMEM;
06abdfb47   Benjamin Herrenschmidt   get_unmapped_area...
2190
  	get_area = current->mm->get_unmapped_area;
c01d5b300   Hugh Dickins   shmem: get_unmapp...
2191
2192
2193
2194
2195
2196
2197
  	if (file) {
  		if (file->f_op->get_unmapped_area)
  			get_area = file->f_op->get_unmapped_area;
  	} else if (flags & MAP_SHARED) {
  		/*
  		 * mmap_region() will call shmem_zero_setup() to create a file,
  		 * so use shmem's get_unmapped_area in case it can be huge.
45e55300f   Peter Collingbourne   mm: remove unnece...
2198
  		 * do_mmap() will clear pgoff, so match alignment.
c01d5b300   Hugh Dickins   shmem: get_unmapp...
2199
2200
2201
2202
  		 */
  		pgoff = 0;
  		get_area = shmem_get_unmapped_area;
  	}
06abdfb47   Benjamin Herrenschmidt   get_unmapped_area...
2203
2204
2205
  	addr = get_area(file, addr, len, pgoff, flags);
  	if (IS_ERR_VALUE(addr))
  		return addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2206

07ab67c8d   Linus Torvalds   Fix get_unmapped_...
2207
2208
  	if (addr > TASK_SIZE - len)
  		return -ENOMEM;
de1741a13   Alexander Kuleshov   mm/mmap: use offs...
2209
  	if (offset_in_page(addr))
07ab67c8d   Linus Torvalds   Fix get_unmapped_...
2210
  		return -EINVAL;
06abdfb47   Benjamin Herrenschmidt   get_unmapped_area...
2211

9ac4ed4bd   Al Viro   move security_mma...
2212
2213
  	error = security_mmap_addr(addr);
  	return error ? error : addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2214
2215
2216
2217
2218
  }
  
  EXPORT_SYMBOL(get_unmapped_area);
  
  /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
48aae4255   ZhenwenXu   mm/mmap.c: fix co...
2219
  struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2220
  {
615d6e875   Davidlohr Bueso   mm: per-thread vm...
2221
2222
  	struct rb_node *rb_node;
  	struct vm_area_struct *vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2223

841e31e5c   Rajman Mekaco   mm/mmap.c: find_v...
2224
  	/* Check the cache first. */
615d6e875   Davidlohr Bueso   mm: per-thread vm...
2225
2226
2227
  	vma = vmacache_find(mm, addr);
  	if (likely(vma))
  		return vma;
841e31e5c   Rajman Mekaco   mm/mmap.c: find_v...
2228

615d6e875   Davidlohr Bueso   mm: per-thread vm...
2229
  	rb_node = mm->mm_rb.rb_node;
841e31e5c   Rajman Mekaco   mm/mmap.c: find_v...
2230

615d6e875   Davidlohr Bueso   mm: per-thread vm...
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
  	while (rb_node) {
  		struct vm_area_struct *tmp;
  
  		tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
  
  		if (tmp->vm_end > addr) {
  			vma = tmp;
  			if (tmp->vm_start <= addr)
  				break;
  			rb_node = rb_node->rb_left;
  		} else
  			rb_node = rb_node->rb_right;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2243
  	}
615d6e875   Davidlohr Bueso   mm: per-thread vm...
2244
2245
2246
  
  	if (vma)
  		vmacache_update(addr, vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2247
2248
2249
2250
  	return vma;
  }
  
  EXPORT_SYMBOL(find_vma);
6bd4837de   KOSAKI Motohiro   mm: simplify find...
2251
2252
  /*
   * Same as find_vma, but also return a pointer to the previous VMA in *pprev.
6bd4837de   KOSAKI Motohiro   mm: simplify find...
2253
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2254
2255
2256
2257
  struct vm_area_struct *
  find_vma_prev(struct mm_struct *mm, unsigned long addr,
  			struct vm_area_struct **pprev)
  {
6bd4837de   KOSAKI Motohiro   mm: simplify find...
2258
  	struct vm_area_struct *vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2259

6bd4837de   KOSAKI Motohiro   mm: simplify find...
2260
  	vma = find_vma(mm, addr);
83cd904d2   Mikulas Patocka   mm: fix find_vma_...
2261
2262
2263
  	if (vma) {
  		*pprev = vma->vm_prev;
  	} else {
73848a971   Wei Yang   mm/mmap.c: refine...
2264
2265
2266
  		struct rb_node *rb_node = rb_last(&mm->mm_rb);
  
  		*pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
83cd904d2   Mikulas Patocka   mm: fix find_vma_...
2267
  	}
6bd4837de   KOSAKI Motohiro   mm: simplify find...
2268
  	return vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2269
2270
2271
2272
2273
2274
2275
  }
  
  /*
   * Verify that the stack growth is acceptable and
   * update accounting. This is shared with both the
   * grow-up and grow-down cases.
   */
1be7107fb   Hugh Dickins   mm: larger stack ...
2276
2277
  static int acct_stack_growth(struct vm_area_struct *vma,
  			     unsigned long size, unsigned long grow)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2278
2279
  {
  	struct mm_struct *mm = vma->vm_mm;
1be7107fb   Hugh Dickins   mm: larger stack ...
2280
  	unsigned long new_start;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2281
2282
  
  	/* address space limit tests */
846383359   Konstantin Khlebnikov   mm: rework virtua...
2283
  	if (!may_expand_vm(mm, vma->vm_flags, grow))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2284
2285
2286
  		return -ENOMEM;
  
  	/* Stack limit test */
24c79d8e0   Krzysztof Opasiak   mm: use dedicated...
2287
  	if (size > rlimit(RLIMIT_STACK))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2288
2289
2290
2291
2292
2293
2294
  		return -ENOMEM;
  
  	/* mlock limit tests */
  	if (vma->vm_flags & VM_LOCKED) {
  		unsigned long locked;
  		unsigned long limit;
  		locked = mm->locked_vm + grow;
24c79d8e0   Krzysztof Opasiak   mm: use dedicated...
2295
  		limit = rlimit(RLIMIT_MEMLOCK);
59e99e5b9   Jiri Slaby   mm: use rlimit he...
2296
  		limit >>= PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2297
2298
2299
  		if (locked > limit && !capable(CAP_IPC_LOCK))
  			return -ENOMEM;
  	}
0d59a01bc   Adam Litke   [PATCH] Don't all...
2300
2301
2302
2303
2304
  	/* Check to ensure the stack will not grow into a hugetlb-only region */
  	new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
  			vma->vm_end - size;
  	if (is_hugepage_only_range(vma->vm_mm, new_start, size))
  		return -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2305
2306
2307
2308
  	/*
  	 * Overcommit..  This must be the final test, as it will
  	 * update security statistics.
  	 */
05fa199d4   Hugh Dickins   mm: pass correct ...
2309
  	if (security_vm_enough_memory_mm(mm, grow))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2310
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2311
2312
  	return 0;
  }
46dea3d09   Hugh Dickins   [PATCH] mm: ia64 ...
2313
  #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2314
  /*
46dea3d09   Hugh Dickins   [PATCH] mm: ia64 ...
2315
2316
   * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
   * vma is the last one with address > vma->vm_end.  Have to extend vma.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2317
   */
46dea3d09   Hugh Dickins   [PATCH] mm: ia64 ...
2318
  int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2319
  {
093578147   Oleg Nesterov   mm: add the "stru...
2320
  	struct mm_struct *mm = vma->vm_mm;
1be7107fb   Hugh Dickins   mm: larger stack ...
2321
2322
  	struct vm_area_struct *next;
  	unsigned long gap_addr;
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2323
  	int error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2324
2325
2326
  
  	if (!(vma->vm_flags & VM_GROWSUP))
  		return -EFAULT;
bd726c90b   Helge Deller   Allow stack to gr...
2327
  	/* Guard against exceeding limits of the address space. */
1be7107fb   Hugh Dickins   mm: larger stack ...
2328
  	address &= PAGE_MASK;
37511fb5c   Helge Deller   mm: fix overflow ...
2329
  	if (address >= (TASK_SIZE & PAGE_MASK))
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2330
  		return -ENOMEM;
bd726c90b   Helge Deller   Allow stack to gr...
2331
  	address += PAGE_SIZE;
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2332

1be7107fb   Hugh Dickins   mm: larger stack ...
2333
2334
  	/* Enforce stack_guard_gap */
  	gap_addr = address + stack_guard_gap;
bd726c90b   Helge Deller   Allow stack to gr...
2335
2336
2337
2338
  
  	/* Guard against overflow */
  	if (gap_addr < address || gap_addr > TASK_SIZE)
  		gap_addr = TASK_SIZE;
1be7107fb   Hugh Dickins   mm: larger stack ...
2339
  	next = vma->vm_next;
3122e80ef   Anshuman Khandual   mm/vma: make vma_...
2340
  	if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
1be7107fb   Hugh Dickins   mm: larger stack ...
2341
2342
2343
2344
  		if (!(next->vm_flags & VM_GROWSUP))
  			return -ENOMEM;
  		/* Check that both stack segments have the same anon_vma? */
  	}
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2345
  	/* We must make sure the anon_vma is allocated. */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2346
2347
  	if (unlikely(anon_vma_prepare(vma)))
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2348
2349
2350
  
  	/*
  	 * vma->vm_start/vm_end cannot change under us because the caller
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
2351
  	 * is required to hold the mmap_lock in read mode.  We need the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2352
2353
  	 * anon_vma lock to serialize against concurrent expand_stacks.
  	 */
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2354
  	anon_vma_lock_write(vma->anon_vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2355
2356
2357
2358
2359
2360
2361
  
  	/* Somebody else might have raced and expanded it already */
  	if (address > vma->vm_end) {
  		unsigned long size, grow;
  
  		size = address - vma->vm_start;
  		grow = (address - vma->vm_end) >> PAGE_SHIFT;
42c36f63a   Hugh Dickins   vm: fix vm_pgoff ...
2362
2363
2364
2365
  		error = -ENOMEM;
  		if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
  			error = acct_stack_growth(vma, size, grow);
  			if (!error) {
4128997b5   Michel Lespinasse   mm: protect again...
2366
2367
  				/*
  				 * vma_gap_update() doesn't support concurrent
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
2368
  				 * updates, but we only hold a shared mmap_lock
4128997b5   Michel Lespinasse   mm: protect again...
2369
2370
  				 * lock here, so we need to protect against
  				 * concurrent vma expansions.
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2371
  				 * anon_vma_lock_write() doesn't help here, as
4128997b5   Michel Lespinasse   mm: protect again...
2372
2373
2374
2375
2376
  				 * we don't guarantee that all growable vmas
  				 * in a mm share the same root anon vma.
  				 * So, we reuse mm->page_table_lock to guard
  				 * against concurrent vma expansions.
  				 */
093578147   Oleg Nesterov   mm: add the "stru...
2377
  				spin_lock(&mm->page_table_lock);
87e8827b3   Oleg Nesterov   mm: fix the racy ...
2378
  				if (vma->vm_flags & VM_LOCKED)
093578147   Oleg Nesterov   mm: add the "stru...
2379
  					mm->locked_vm += grow;
846383359   Konstantin Khlebnikov   mm: rework virtua...
2380
  				vm_stat_account(mm, vma->vm_flags, grow);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
2381
  				anon_vma_interval_tree_pre_update_vma(vma);
42c36f63a   Hugh Dickins   vm: fix vm_pgoff ...
2382
  				vma->vm_end = address;
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
2383
  				anon_vma_interval_tree_post_update_vma(vma);
d37371870   Michel Lespinasse   mm: augment vma r...
2384
2385
2386
  				if (vma->vm_next)
  					vma_gap_update(vma->vm_next);
  				else
1be7107fb   Hugh Dickins   mm: larger stack ...
2387
  					mm->highest_vm_end = vm_end_gap(vma);
093578147   Oleg Nesterov   mm: add the "stru...
2388
  				spin_unlock(&mm->page_table_lock);
4128997b5   Michel Lespinasse   mm: protect again...
2389

42c36f63a   Hugh Dickins   vm: fix vm_pgoff ...
2390
2391
  				perf_event_mmap(vma);
  			}
3af9e8592   Eric B Munson   perf: Add non-exe...
2392
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2393
  	}
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2394
  	anon_vma_unlock_write(vma->anon_vma);
6d50e60cd   David Rientjes   mm, thp: fix coll...
2395
  	khugepaged_enter_vma_merge(vma, vma->vm_flags);
093578147   Oleg Nesterov   mm: add the "stru...
2396
  	validate_mm(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2397
2398
  	return error;
  }
46dea3d09   Hugh Dickins   [PATCH] mm: ia64 ...
2399
  #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2400
2401
2402
  /*
   * vma is the first one with address < vma->vm_start.  Have to extend vma.
   */
d05f3169c   Michal Hocko   mm: make expand_d...
2403
  int expand_downwards(struct vm_area_struct *vma,
b6a2fea39   Ollie Wild   mm: variable leng...
2404
  				   unsigned long address)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2405
  {
093578147   Oleg Nesterov   mm: add the "stru...
2406
  	struct mm_struct *mm = vma->vm_mm;
1be7107fb   Hugh Dickins   mm: larger stack ...
2407
  	struct vm_area_struct *prev;
0a1d52994   Jann Horn   mm: enforce min a...
2408
  	int error = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2409

8869477a4   Eric Paris   security: protect...
2410
  	address &= PAGE_MASK;
0a1d52994   Jann Horn   mm: enforce min a...
2411
2412
  	if (address < mmap_min_addr)
  		return -EPERM;
8869477a4   Eric Paris   security: protect...
2413

1be7107fb   Hugh Dickins   mm: larger stack ...
2414
  	/* Enforce stack_guard_gap */
1be7107fb   Hugh Dickins   mm: larger stack ...
2415
  	prev = vma->vm_prev;
32e4e6d5c   Oleg Nesterov   mm/mmap.c: expand...
2416
2417
  	/* Check that both stack segments have the same anon_vma? */
  	if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
3122e80ef   Anshuman Khandual   mm/vma: make vma_...
2418
  			vma_is_accessible(prev)) {
32e4e6d5c   Oleg Nesterov   mm/mmap.c: expand...
2419
  		if (address - prev->vm_end < stack_guard_gap)
1be7107fb   Hugh Dickins   mm: larger stack ...
2420
  			return -ENOMEM;
1be7107fb   Hugh Dickins   mm: larger stack ...
2421
  	}
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2422
2423
2424
  	/* We must make sure the anon_vma is allocated. */
  	if (unlikely(anon_vma_prepare(vma)))
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2425
2426
2427
  
  	/*
  	 * vma->vm_start/vm_end cannot change under us because the caller
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
2428
  	 * is required to hold the mmap_lock in read mode.  We need the
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2429
2430
  	 * anon_vma lock to serialize against concurrent expand_stacks.
  	 */
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2431
  	anon_vma_lock_write(vma->anon_vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2432
2433
2434
2435
2436
2437
2438
  
  	/* Somebody else might have raced and expanded it already */
  	if (address < vma->vm_start) {
  		unsigned long size, grow;
  
  		size = vma->vm_end - address;
  		grow = (vma->vm_start - address) >> PAGE_SHIFT;
a626ca6a6   Linus Torvalds   vm: fix vm_pgoff ...
2439
2440
2441
2442
  		error = -ENOMEM;
  		if (grow <= vma->vm_pgoff) {
  			error = acct_stack_growth(vma, size, grow);
  			if (!error) {
4128997b5   Michel Lespinasse   mm: protect again...
2443
2444
  				/*
  				 * vma_gap_update() doesn't support concurrent
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
2445
  				 * updates, but we only hold a shared mmap_lock
4128997b5   Michel Lespinasse   mm: protect again...
2446
2447
  				 * lock here, so we need to protect against
  				 * concurrent vma expansions.
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2448
  				 * anon_vma_lock_write() doesn't help here, as
4128997b5   Michel Lespinasse   mm: protect again...
2449
2450
2451
2452
2453
  				 * we don't guarantee that all growable vmas
  				 * in a mm share the same root anon vma.
  				 * So, we reuse mm->page_table_lock to guard
  				 * against concurrent vma expansions.
  				 */
093578147   Oleg Nesterov   mm: add the "stru...
2454
  				spin_lock(&mm->page_table_lock);
87e8827b3   Oleg Nesterov   mm: fix the racy ...
2455
  				if (vma->vm_flags & VM_LOCKED)
093578147   Oleg Nesterov   mm: add the "stru...
2456
  					mm->locked_vm += grow;
846383359   Konstantin Khlebnikov   mm: rework virtua...
2457
  				vm_stat_account(mm, vma->vm_flags, grow);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
2458
  				anon_vma_interval_tree_pre_update_vma(vma);
a626ca6a6   Linus Torvalds   vm: fix vm_pgoff ...
2459
2460
  				vma->vm_start = address;
  				vma->vm_pgoff -= grow;
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
2461
  				anon_vma_interval_tree_post_update_vma(vma);
d37371870   Michel Lespinasse   mm: augment vma r...
2462
  				vma_gap_update(vma);
093578147   Oleg Nesterov   mm: add the "stru...
2463
  				spin_unlock(&mm->page_table_lock);
4128997b5   Michel Lespinasse   mm: protect again...
2464

a626ca6a6   Linus Torvalds   vm: fix vm_pgoff ...
2465
2466
  				perf_event_mmap(vma);
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2467
2468
  		}
  	}
12352d3ca   Konstantin Khlebnikov   mm: replace vma_l...
2469
  	anon_vma_unlock_write(vma->anon_vma);
6d50e60cd   David Rientjes   mm, thp: fix coll...
2470
  	khugepaged_enter_vma_merge(vma, vma->vm_flags);
093578147   Oleg Nesterov   mm: add the "stru...
2471
  	validate_mm(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2472
2473
  	return error;
  }
1be7107fb   Hugh Dickins   mm: larger stack ...
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
  /* enforced gap between the expanding stack and other mappings. */
  unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
  
  static int __init cmdline_parse_stack_guard_gap(char *p)
  {
  	unsigned long val;
  	char *endptr;
  
  	val = simple_strtoul(p, &endptr, 10);
  	if (!*endptr)
  		stack_guard_gap = val << PAGE_SHIFT;
  
  	return 0;
  }
  __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
b6a2fea39   Ollie Wild   mm: variable leng...
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
  #ifdef CONFIG_STACK_GROWSUP
  int expand_stack(struct vm_area_struct *vma, unsigned long address)
  {
  	return expand_upwards(vma, address);
  }
  
  struct vm_area_struct *
  find_extend_vma(struct mm_struct *mm, unsigned long addr)
  {
  	struct vm_area_struct *vma, *prev;
  
  	addr &= PAGE_MASK;
  	vma = find_vma_prev(mm, addr, &prev);
  	if (vma && (vma->vm_start <= addr))
  		return vma;
04f5866e4   Andrea Arcangeli   coredump: fix rac...
2504
  	/* don't alter vm_end if the coredump is running */
4d45e75a9   Jann Horn   mm: remove the no...
2505
  	if (!prev || expand_stack(prev, addr))
b6a2fea39   Ollie Wild   mm: variable leng...
2506
  		return NULL;
cea10a19b   Michel Lespinasse   mm: directly use ...
2507
  	if (prev->vm_flags & VM_LOCKED)
fc05f5662   Kirill A. Shutemov   mm: rename __mloc...
2508
  		populate_vma_page_range(prev, addr, prev->vm_end, NULL);
b6a2fea39   Ollie Wild   mm: variable leng...
2509
2510
2511
2512
2513
2514
2515
  	return prev;
  }
  #else
  int expand_stack(struct vm_area_struct *vma, unsigned long address)
  {
  	return expand_downwards(vma, address);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2516
  struct vm_area_struct *
cc71aba34   vishnu.ps   mm/mmap.c: whites...
2517
  find_extend_vma(struct mm_struct *mm, unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2518
  {
cc71aba34   vishnu.ps   mm/mmap.c: whites...
2519
  	struct vm_area_struct *vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2520
2521
2522
  	unsigned long start;
  
  	addr &= PAGE_MASK;
cc71aba34   vishnu.ps   mm/mmap.c: whites...
2523
  	vma = find_vma(mm, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2524
2525
2526
2527
2528
2529
2530
2531
2532
  	if (!vma)
  		return NULL;
  	if (vma->vm_start <= addr)
  		return vma;
  	if (!(vma->vm_flags & VM_GROWSDOWN))
  		return NULL;
  	start = vma->vm_start;
  	if (expand_stack(vma, addr))
  		return NULL;
cea10a19b   Michel Lespinasse   mm: directly use ...
2533
  	if (vma->vm_flags & VM_LOCKED)
fc05f5662   Kirill A. Shutemov   mm: rename __mloc...
2534
  		populate_vma_page_range(vma, addr, start, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2535
2536
2537
  	return vma;
  }
  #endif
e1d6d01ab   Jesse Barnes   mm: export find_e...
2538
  EXPORT_SYMBOL_GPL(find_extend_vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2539
  /*
2c0b38146   Hugh Dickins   [PATCH] mm: remov...
2540
   * Ok - we have the memory areas we should free on the vma list,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2541
   * so release them, and do the vma updates.
2c0b38146   Hugh Dickins   [PATCH] mm: remov...
2542
2543
   *
   * Called with the mm semaphore held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2544
   */
2c0b38146   Hugh Dickins   [PATCH] mm: remov...
2545
  static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2546
  {
4f74d2c8e   Linus Torvalds   vm: remove 'nr_ac...
2547
  	unsigned long nr_accounted = 0;
365e9c87a   Hugh Dickins   [PATCH] mm: updat...
2548
2549
  	/* Update high watermark before we lower total_vm */
  	update_hiwater_vm(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2550
  	do {
2c0b38146   Hugh Dickins   [PATCH] mm: remov...
2551
  		long nrpages = vma_pages(vma);
4f74d2c8e   Linus Torvalds   vm: remove 'nr_ac...
2552
2553
  		if (vma->vm_flags & VM_ACCOUNT)
  			nr_accounted += nrpages;
846383359   Konstantin Khlebnikov   mm: rework virtua...
2554
  		vm_stat_account(mm, vma->vm_flags, -nrpages);
a8fb5618d   Hugh Dickins   [PATCH] mm: unlin...
2555
  		vma = remove_vma(vma);
146425a31   Hugh Dickins   [PATCH] freepgt: ...
2556
  	} while (vma);
4f74d2c8e   Linus Torvalds   vm: remove 'nr_ac...
2557
  	vm_unacct_memory(nr_accounted);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2558
2559
2560
2561
2562
2563
  	validate_mm(mm);
  }
  
  /*
   * Get rid of page table information in the indicated region.
   *
f10df6860   Paolo 'Blaisorblade' Giarrusso   [PATCH] fix locki...
2564
   * Called with the mm semaphore held.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2565
2566
   */
  static void unmap_region(struct mm_struct *mm,
e0da382c9   Hugh Dickins   [PATCH] freepgt: ...
2567
2568
  		struct vm_area_struct *vma, struct vm_area_struct *prev,
  		unsigned long start, unsigned long end)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2569
  {
3903b55a6   Liam R. Howlett   mm/mmap: add inli...
2570
  	struct vm_area_struct *next = vma_next(mm, prev);
d16dfc550   Peter Zijlstra   mm: mmu_gather re...
2571
  	struct mmu_gather tlb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2572
2573
  
  	lru_add_drain();
2b047252d   Linus Torvalds   Fix TLB gather vi...
2574
  	tlb_gather_mmu(&tlb, mm, start, end);
365e9c87a   Hugh Dickins   [PATCH] mm: updat...
2575
  	update_hiwater_rss(mm);
4f74d2c8e   Linus Torvalds   vm: remove 'nr_ac...
2576
  	unmap_vmas(&tlb, vma, start, end);
d16dfc550   Peter Zijlstra   mm: mmu_gather re...
2577
  	free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
6ee8630e0   Hugh Dickins   mm: allow arch co...
2578
  				 next ? next->vm_start : USER_PGTABLES_CEILING);
d16dfc550   Peter Zijlstra   mm: mmu_gather re...
2579
  	tlb_finish_mmu(&tlb, start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2580
2581
2582
2583
2584
2585
  }
  
  /*
   * Create a list of vma's touched by the unmap, removing them from the mm's
   * vma list as we go..
   */
246c320a8   Kirill A. Shutemov   mm/mmap.c: close ...
2586
  static bool
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2587
2588
2589
2590
2591
2592
2593
  detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
  	struct vm_area_struct *prev, unsigned long end)
  {
  	struct vm_area_struct **insertion_point;
  	struct vm_area_struct *tail_vma = NULL;
  
  	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
297c5eee3   Linus Torvalds   mm: make the vma ...
2594
  	vma->vm_prev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2595
  	do {
d37371870   Michel Lespinasse   mm: augment vma r...
2596
  		vma_rb_erase(vma, &mm->mm_rb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2597
2598
2599
2600
2601
  		mm->map_count--;
  		tail_vma = vma;
  		vma = vma->vm_next;
  	} while (vma && vma->vm_start < end);
  	*insertion_point = vma;
d37371870   Michel Lespinasse   mm: augment vma r...
2602
  	if (vma) {
297c5eee3   Linus Torvalds   mm: make the vma ...
2603
  		vma->vm_prev = prev;
d37371870   Michel Lespinasse   mm: augment vma r...
2604
2605
  		vma_gap_update(vma);
  	} else
1be7107fb   Hugh Dickins   mm: larger stack ...
2606
  		mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2607
  	tail_vma->vm_next = NULL;
615d6e875   Davidlohr Bueso   mm: per-thread vm...
2608
2609
2610
  
  	/* Kill the cache */
  	vmacache_invalidate(mm);
246c320a8   Kirill A. Shutemov   mm/mmap.c: close ...
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
  
  	/*
  	 * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
  	 * VM_GROWSUP VMA. Such VMAs can change their size under
  	 * down_read(mmap_lock) and collide with the VMA we are about to unmap.
  	 */
  	if (vma && (vma->vm_flags & VM_GROWSDOWN))
  		return false;
  	if (prev && (prev->vm_flags & VM_GROWSUP))
  		return false;
  	return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2622
2623
2624
  }
  
  /*
def5efe03   David Rientjes   mm, madvise: fail...
2625
2626
   * __split_vma() bypasses sysctl_max_map_count checking.  We use this where it
   * has already been checked or doesn't make sense to fail.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2627
   */
def5efe03   David Rientjes   mm, madvise: fail...
2628
2629
  int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  		unsigned long addr, int new_below)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2630
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2631
  	struct vm_area_struct *new;
e39758912   Chen Gang   mm/mmap.c: simpli...
2632
  	int err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2633

31383c686   Dan Williams   mm, hugetlbfs: in...
2634
2635
2636
2637
2638
  	if (vma->vm_ops && vma->vm_ops->split) {
  		err = vma->vm_ops->split(vma, addr);
  		if (err)
  			return err;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2639

3928d4f5e   Linus Torvalds   mm: use helper fu...
2640
  	new = vm_area_dup(vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2641
  	if (!new)
e39758912   Chen Gang   mm/mmap.c: simpli...
2642
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2643

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2644
2645
2646
2647
2648
2649
  	if (new_below)
  		new->vm_end = addr;
  	else {
  		new->vm_start = addr;
  		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
  	}
ef0855d33   Oleg Nesterov   mm: mempolicy: tu...
2650
2651
  	err = vma_dup_policy(vma, new);
  	if (err)
5beb49305   Rik van Riel   mm: change anon_v...
2652
  		goto out_free_vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2653

c4ea95d7c   Daniel Forrest   mm: fix anon_vma_...
2654
2655
  	err = anon_vma_clone(new, vma);
  	if (err)
5beb49305   Rik van Riel   mm: change anon_v...
2656
  		goto out_free_mpol;
e9714acf8   Konstantin Khlebnikov   mm: kill vma flag...
2657
  	if (new->vm_file)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2658
2659
2660
2661
2662
2663
  		get_file(new->vm_file);
  
  	if (new->vm_ops && new->vm_ops->open)
  		new->vm_ops->open(new);
  
  	if (new_below)
5beb49305   Rik van Riel   mm: change anon_v...
2664
  		err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2665
2666
  			((addr - new->vm_start) >> PAGE_SHIFT), new);
  	else
5beb49305   Rik van Riel   mm: change anon_v...
2667
  		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2668

5beb49305   Rik van Riel   mm: change anon_v...
2669
2670
2671
2672
2673
  	/* Success. */
  	if (!err)
  		return 0;
  
  	/* Clean everything up if vma_adjust failed. */
589275338   Rik van Riel   mmap: check ->vm_...
2674
2675
  	if (new->vm_ops && new->vm_ops->close)
  		new->vm_ops->close(new);
e9714acf8   Konstantin Khlebnikov   mm: kill vma flag...
2676
  	if (new->vm_file)
5beb49305   Rik van Riel   mm: change anon_v...
2677
  		fput(new->vm_file);
2aeadc30d   Andrea Arcangeli   mmap: call unlink...
2678
  	unlink_anon_vmas(new);
5beb49305   Rik van Riel   mm: change anon_v...
2679
   out_free_mpol:
ef0855d33   Oleg Nesterov   mm: mempolicy: tu...
2680
  	mpol_put(vma_policy(new));
5beb49305   Rik van Riel   mm: change anon_v...
2681
   out_free_vma:
3928d4f5e   Linus Torvalds   mm: use helper fu...
2682
  	vm_area_free(new);
5beb49305   Rik van Riel   mm: change anon_v...
2683
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2684
  }
659ace584   KOSAKI Motohiro   mmap: don't retur...
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
  /*
   * Split a vma into two pieces at address 'addr', a new vma is allocated
   * either for the first part or the tail.
   */
  int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  	      unsigned long addr, int new_below)
  {
  	if (mm->map_count >= sysctl_max_map_count)
  		return -ENOMEM;
  
  	return __split_vma(mm, vma, addr, new_below);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2697
2698
2699
2700
2701
  /* Munmap is split into 2 main parts -- this part which finds
   * what needs doing, and the areas themselves, which do the
   * work.  This now handles partial unmappings.
   * Jeremy Fitzhardinge <jeremy@goop.org>
   */
85a06835f   Yang Shi   mm: mremap: downg...
2702
2703
  int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
  		struct list_head *uf, bool downgrade)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2704
2705
  {
  	unsigned long end;
146425a31   Hugh Dickins   [PATCH] freepgt: ...
2706
  	struct vm_area_struct *vma, *prev, *last;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2707

de1741a13   Alexander Kuleshov   mm/mmap: use offs...
2708
  	if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2709
  		return -EINVAL;
cc71aba34   vishnu.ps   mm/mmap.c: whites...
2710
  	len = PAGE_ALIGN(len);
5a28fc94c   Dave Hansen   x86/mpx, mm/core:...
2711
  	end = start + len;
cc71aba34   vishnu.ps   mm/mmap.c: whites...
2712
  	if (len == 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2713
  		return -EINVAL;
5a28fc94c   Dave Hansen   x86/mpx, mm/core:...
2714
2715
2716
2717
2718
2719
  	/*
  	 * arch_unmap() might do unmaps itself.  It must be called
  	 * and finish any rbtree manipulation before this code
  	 * runs and also starts to manipulate the rbtree.
  	 */
  	arch_unmap(mm, start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2720
  	/* Find the first overlapping VMA */
9be34c9d5   Linus Torvalds   mm: get rid of th...
2721
  	vma = find_vma(mm, start);
146425a31   Hugh Dickins   [PATCH] freepgt: ...
2722
  	if (!vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2723
  		return 0;
9be34c9d5   Linus Torvalds   mm: get rid of th...
2724
  	prev = vma->vm_prev;
146425a31   Hugh Dickins   [PATCH] freepgt: ...
2725
  	/* we have  start < vma->vm_end  */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2726
2727
  
  	/* if it doesn't overlap, we have nothing.. */
146425a31   Hugh Dickins   [PATCH] freepgt: ...
2728
  	if (vma->vm_start >= end)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2729
2730
2731
2732
2733
2734
2735
2736
2737
  		return 0;
  
  	/*
  	 * If we need to split any vma, do it now to save pain later.
  	 *
  	 * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
  	 * unmapped vm_area_struct will remain in use: so lower split_vma
  	 * places tmp vma above, and higher split_vma places tmp vma below.
  	 */
146425a31   Hugh Dickins   [PATCH] freepgt: ...
2738
  	if (start > vma->vm_start) {
659ace584   KOSAKI Motohiro   mmap: don't retur...
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
  		int error;
  
  		/*
  		 * Make sure that map_count on return from munmap() will
  		 * not exceed its limit; but let map_count go just above
  		 * its limit temporarily, to help free resources as expected.
  		 */
  		if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
  			return -ENOMEM;
  
  		error = __split_vma(mm, vma, start, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2750
2751
  		if (error)
  			return error;
146425a31   Hugh Dickins   [PATCH] freepgt: ...
2752
  		prev = vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2753
2754
2755
2756
2757
  	}
  
  	/* Does it split the last one? */
  	last = find_vma(mm, end);
  	if (last && end > last->vm_start) {
659ace584   KOSAKI Motohiro   mmap: don't retur...
2758
  		int error = __split_vma(mm, last, end, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2759
2760
2761
  		if (error)
  			return error;
  	}
3903b55a6   Liam R. Howlett   mm/mmap: add inli...
2762
  	vma = vma_next(mm, prev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2763

2376dd7ce   Andrea Arcangeli   userfaultfd: call...
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
  	if (unlikely(uf)) {
  		/*
  		 * If userfaultfd_unmap_prep returns an error the vmas
  		 * will remain splitted, but userland will get a
  		 * highly unexpected error anyway. This is no
  		 * different than the case where the first of the two
  		 * __split_vma fails, but we don't undo the first
  		 * split, despite we could. This is unlikely enough
  		 * failure that it's not worth optimizing it for.
  		 */
  		int error = userfaultfd_unmap_prep(vma, start, end, uf);
  		if (error)
  			return error;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2778
  	/*
ba470de43   Rik van Riel   mmap: handle mloc...
2779
2780
2781
2782
2783
2784
2785
2786
2787
  	 * unlock any mlock()ed ranges before detaching vmas
  	 */
  	if (mm->locked_vm) {
  		struct vm_area_struct *tmp = vma;
  		while (tmp && tmp->vm_start < end) {
  			if (tmp->vm_flags & VM_LOCKED) {
  				mm->locked_vm -= vma_pages(tmp);
  				munlock_vma_pages_all(tmp);
  			}
dd2283f26   Yang Shi   mm: mmap: zap pag...
2788

ba470de43   Rik van Riel   mmap: handle mloc...
2789
2790
2791
  			tmp = tmp->vm_next;
  		}
  	}
dd2283f26   Yang Shi   mm: mmap: zap pag...
2792
  	/* Detach vmas from rbtree */
246c320a8   Kirill A. Shutemov   mm/mmap.c: close ...
2793
2794
  	if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
  		downgrade = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2795

dd2283f26   Yang Shi   mm: mmap: zap pag...
2796
  	if (downgrade)
d8ed45c5d   Michel Lespinasse   mmap locking API:...
2797
  		mmap_write_downgrade(mm);
dd2283f26   Yang Shi   mm: mmap: zap pag...
2798
2799
  
  	unmap_region(mm, vma, prev, start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2800
  	/* Fix up all other VM information */
2c0b38146   Hugh Dickins   [PATCH] mm: remov...
2801
  	remove_vma_list(mm, vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2802

dd2283f26   Yang Shi   mm: mmap: zap pag...
2803
  	return downgrade ? 1 : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2804
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2805

dd2283f26   Yang Shi   mm: mmap: zap pag...
2806
2807
2808
2809
2810
2811
2812
  int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
  	      struct list_head *uf)
  {
  	return __do_munmap(mm, start, len, uf, false);
  }
  
  static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2813
2814
  {
  	int ret;
bfce281c2   Al Viro   kill mm argument ...
2815
  	struct mm_struct *mm = current->mm;
897ab3e0c   Mike Rapoport   userfaultfd: non-...
2816
  	LIST_HEAD(uf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2817

d8ed45c5d   Michel Lespinasse   mmap locking API:...
2818
  	if (mmap_write_lock_killable(mm))
ae7987835   Michal Hocko   mm: make vm_munma...
2819
  		return -EINTR;
dd2283f26   Yang Shi   mm: mmap: zap pag...
2820
2821
  	ret = __do_munmap(mm, start, len, &uf, downgrade);
  	/*
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
2822
  	 * Returning 1 indicates mmap_lock is downgraded.
dd2283f26   Yang Shi   mm: mmap: zap pag...
2823
2824
2825
2826
  	 * But 1 is not legal return value of vm_munmap() and munmap(), reset
  	 * it to 0 before return.
  	 */
  	if (ret == 1) {
d8ed45c5d   Michel Lespinasse   mmap locking API:...
2827
  		mmap_read_unlock(mm);
dd2283f26   Yang Shi   mm: mmap: zap pag...
2828
2829
  		ret = 0;
  	} else
d8ed45c5d   Michel Lespinasse   mmap locking API:...
2830
  		mmap_write_unlock(mm);
dd2283f26   Yang Shi   mm: mmap: zap pag...
2831

897ab3e0c   Mike Rapoport   userfaultfd: non-...
2832
  	userfaultfd_unmap_complete(mm, &uf);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2833
2834
  	return ret;
  }
dd2283f26   Yang Shi   mm: mmap: zap pag...
2835
2836
2837
2838
2839
  
  int vm_munmap(unsigned long start, size_t len)
  {
  	return __vm_munmap(start, len, false);
  }
a46ef99d8   Linus Torvalds   VM: add "vm_munma...
2840
2841
2842
2843
  EXPORT_SYMBOL(vm_munmap);
  
  SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
  {
ce18d171c   Catalin Marinas   mm: untag user po...
2844
  	addr = untagged_addr(addr);
a46ef99d8   Linus Torvalds   VM: add "vm_munma...
2845
  	profile_munmap(addr);
dd2283f26   Yang Shi   mm: mmap: zap pag...
2846
  	return __vm_munmap(addr, len, true);
a46ef99d8   Linus Torvalds   VM: add "vm_munma...
2847
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2848

c8d78c182   Kirill A. Shutemov   mm: replace remap...
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
  
  /*
   * Emulation of deprecated remap_file_pages() syscall.
   */
  SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
  		unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
  {
  
  	struct mm_struct *mm = current->mm;
  	struct vm_area_struct *vma;
  	unsigned long populate = 0;
  	unsigned long ret = -EINVAL;
  	struct file *file;
ad56b738c   Mike Rapoport   docs/vm: rename d...
2862
2863
  	pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.
  ",
756a025f0   Joe Perches   mm: coalesce spli...
2864
  		     current->comm, current->pid);
c8d78c182   Kirill A. Shutemov   mm: replace remap...
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
  
  	if (prot)
  		return ret;
  	start = start & PAGE_MASK;
  	size = size & PAGE_MASK;
  
  	if (start + size <= start)
  		return ret;
  
  	/* Does pgoff wrap? */
  	if (pgoff + (size >> PAGE_SHIFT) < pgoff)
  		return ret;
d8ed45c5d   Michel Lespinasse   mmap locking API:...
2877
  	if (mmap_write_lock_killable(mm))
dc0ef0df7   Michal Hocko   mm: make mmap_sem...
2878
  		return -EINTR;
c8d78c182   Kirill A. Shutemov   mm: replace remap...
2879
2880
2881
2882
  	vma = find_vma(mm, start);
  
  	if (!vma || !(vma->vm_flags & VM_SHARED))
  		goto out;
48f7df329   Kirill A. Shutemov   mm: fix regressio...
2883
  	if (start < vma->vm_start)
c8d78c182   Kirill A. Shutemov   mm: replace remap...
2884
  		goto out;
48f7df329   Kirill A. Shutemov   mm: fix regressio...
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
  	if (start + size > vma->vm_end) {
  		struct vm_area_struct *next;
  
  		for (next = vma->vm_next; next; next = next->vm_next) {
  			/* hole between vmas ? */
  			if (next->vm_start != next->vm_prev->vm_end)
  				goto out;
  
  			if (next->vm_file != vma->vm_file)
  				goto out;
  
  			if (next->vm_flags != vma->vm_flags)
  				goto out;
  
  			if (start + size <= next->vm_end)
  				break;
  		}
  
  		if (!next)
  			goto out;
c8d78c182   Kirill A. Shutemov   mm: replace remap...
2905
2906
2907
2908
2909
2910
2911
2912
2913
  	}
  
  	prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
  	prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
  	prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
  
  	flags &= MAP_NONBLOCK;
  	flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
  	if (vma->vm_flags & VM_LOCKED) {
48f7df329   Kirill A. Shutemov   mm: fix regressio...
2914
  		struct vm_area_struct *tmp;
c8d78c182   Kirill A. Shutemov   mm: replace remap...
2915
  		flags |= MAP_LOCKED;
48f7df329   Kirill A. Shutemov   mm: fix regressio...
2916

c8d78c182   Kirill A. Shutemov   mm: replace remap...
2917
  		/* drop PG_Mlocked flag for over-mapped range */
48f7df329   Kirill A. Shutemov   mm: fix regressio...
2918
2919
  		for (tmp = vma; tmp->vm_start >= start + size;
  				tmp = tmp->vm_next) {
9a73f61bd   Kirill A. Shutemov   thp, mlock: do no...
2920
2921
2922
2923
2924
  			/*
  			 * Split pmd and munlock page on the border
  			 * of the range.
  			 */
  			vma_adjust_trans_huge(tmp, start, start + size, 0);
48f7df329   Kirill A. Shutemov   mm: fix regressio...
2925
2926
2927
2928
  			munlock_vma_pages_range(tmp,
  					max(tmp->vm_start, start),
  					min(tmp->vm_end, start + size));
  		}
c8d78c182   Kirill A. Shutemov   mm: replace remap...
2929
2930
2931
  	}
  
  	file = get_file(vma->vm_file);
45e55300f   Peter Collingbourne   mm: remove unnece...
2932
  	ret = do_mmap(vma->vm_file, start, size,
897ab3e0c   Mike Rapoport   userfaultfd: non-...
2933
  			prot, flags, pgoff, &populate, NULL);
c8d78c182   Kirill A. Shutemov   mm: replace remap...
2934
2935
  	fput(file);
  out:
d8ed45c5d   Michel Lespinasse   mmap locking API:...
2936
  	mmap_write_unlock(mm);
c8d78c182   Kirill A. Shutemov   mm: replace remap...
2937
2938
2939
2940
2941
2942
  	if (populate)
  		mm_populate(ret, populate);
  	if (!IS_ERR_VALUE(ret))
  		ret = 0;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2943
2944
2945
2946
2947
  /*
   *  this is really a simplified "do_mmap".  it only handles
   *  anonymous maps.  eventually we may be able to do some
   *  brk-specific accounting here.
   */
bb177a732   Michal Hocko   mm: do not bug_on...
2948
  static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2949
  {
cc71aba34   vishnu.ps   mm/mmap.c: whites...
2950
2951
  	struct mm_struct *mm = current->mm;
  	struct vm_area_struct *vma, *prev;
cc71aba34   vishnu.ps   mm/mmap.c: whites...
2952
  	struct rb_node **rb_link, *rb_parent;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2953
  	pgoff_t pgoff = addr >> PAGE_SHIFT;
3a4597568   Kirill Korotaev   [PATCH] IA64,spar...
2954
  	int error;
ff68dac6d   Gaowei Pu   mm/mmap.c: use IS...
2955
  	unsigned long mapped_addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2956

16e72e9b3   Denys Vlasenko   powerpc: do not m...
2957
2958
2959
2960
  	/* Until we need other flags, refuse anything except VM_EXEC. */
  	if ((flags & (~VM_EXEC)) != 0)
  		return -EINVAL;
  	flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
3a4597568   Kirill Korotaev   [PATCH] IA64,spar...
2961

ff68dac6d   Gaowei Pu   mm/mmap.c: use IS...
2962
2963
2964
  	mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
  	if (IS_ERR_VALUE(mapped_addr))
  		return mapped_addr;
3a4597568   Kirill Korotaev   [PATCH] IA64,spar...
2965

363ee17f0   Davidlohr Bueso   mm/mmap.c: add ml...
2966
2967
2968
  	error = mlock_future_check(mm, mm->def_flags, len);
  	if (error)
  		return error;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2969

fb8090b69   Liam R. Howlett   mm/mmap: add inli...
2970
2971
2972
  	/* Clear old maps, set up prev, rb_link, rb_parent, and uf */
  	if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2973
2974
  
  	/* Check against address space limits *after* clearing old maps... */
846383359   Konstantin Khlebnikov   mm: rework virtua...
2975
  	if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2976
2977
2978
2979
  		return -ENOMEM;
  
  	if (mm->map_count > sysctl_max_map_count)
  		return -ENOMEM;
191c54244   Al Viro   mm: collapse secu...
2980
  	if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2981
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2982
  	/* Can we just expand an old private anonymous mapping? */
ba470de43   Rik van Riel   mmap: handle mloc...
2983
  	vma = vma_merge(mm, prev, addr, addr + len, flags,
60500a422   Colin Cross   ANDROID: mm: add ...
2984
  			NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
ba470de43   Rik van Riel   mmap: handle mloc...
2985
  	if (vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2986
2987
2988
2989
2990
  		goto out;
  
  	/*
  	 * create a vma struct for an anonymous mapping
  	 */
490fc0538   Linus Torvalds   mm: make vm_area_...
2991
  	vma = vm_area_alloc(mm);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2992
2993
2994
2995
  	if (!vma) {
  		vm_unacct_memory(len >> PAGE_SHIFT);
  		return -ENOMEM;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2996

bfd40eaff   Kirill A. Shutemov   mm: fix vma_is_an...
2997
  	vma_set_anonymous(vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2998
2999
3000
3001
  	vma->vm_start = addr;
  	vma->vm_end = addr + len;
  	vma->vm_pgoff = pgoff;
  	vma->vm_flags = flags;
3ed75eb8f   Coly Li   setup vma->vm_pag...
3002
  	vma->vm_page_prot = vm_get_page_prot(flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3003
3004
  	vma_link(mm, vma, prev, rb_link, rb_parent);
  out:
3af9e8592   Eric B Munson   perf: Add non-exe...
3005
  	perf_event_mmap(vma);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3006
  	mm->total_vm += len >> PAGE_SHIFT;
846383359   Konstantin Khlebnikov   mm: rework virtua...
3007
  	mm->data_vm += len >> PAGE_SHIFT;
128557ffe   Michel Lespinasse   mm: use mm_popula...
3008
3009
  	if (flags & VM_LOCKED)
  		mm->locked_vm += (len >> PAGE_SHIFT);
d9104d1ca   Cyrill Gorcunov   mm: track vma cha...
3010
  	vma->vm_flags |= VM_SOFTDIRTY;
5d22fc25d   Linus Torvalds   mm: remove more I...
3011
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3012
  }
bb177a732   Michal Hocko   mm: do not bug_on...
3013
  int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
e4eb1ff61   Linus Torvalds   VM: add "vm_brk()...
3014
3015
  {
  	struct mm_struct *mm = current->mm;
bb177a732   Michal Hocko   mm: do not bug_on...
3016
  	unsigned long len;
5d22fc25d   Linus Torvalds   mm: remove more I...
3017
  	int ret;
128557ffe   Michel Lespinasse   mm: use mm_popula...
3018
  	bool populate;
897ab3e0c   Mike Rapoport   userfaultfd: non-...
3019
  	LIST_HEAD(uf);
e4eb1ff61   Linus Torvalds   VM: add "vm_brk()...
3020

bb177a732   Michal Hocko   mm: do not bug_on...
3021
3022
3023
3024
3025
  	len = PAGE_ALIGN(request);
  	if (len < request)
  		return -ENOMEM;
  	if (!len)
  		return 0;
d8ed45c5d   Michel Lespinasse   mmap locking API:...
3026
  	if (mmap_write_lock_killable(mm))
2d6c92824   Michal Hocko   mm: make vm_brk k...
3027
  		return -EINTR;
897ab3e0c   Mike Rapoport   userfaultfd: non-...
3028
  	ret = do_brk_flags(addr, len, flags, &uf);
128557ffe   Michel Lespinasse   mm: use mm_popula...
3029
  	populate = ((mm->def_flags & VM_LOCKED) != 0);
d8ed45c5d   Michel Lespinasse   mmap locking API:...
3030
  	mmap_write_unlock(mm);
897ab3e0c   Mike Rapoport   userfaultfd: non-...
3031
  	userfaultfd_unmap_complete(mm, &uf);
5d22fc25d   Linus Torvalds   mm: remove more I...
3032
  	if (populate && !ret)
128557ffe   Michel Lespinasse   mm: use mm_popula...
3033
  		mm_populate(addr, len);
e4eb1ff61   Linus Torvalds   VM: add "vm_brk()...
3034
3035
  	return ret;
  }
16e72e9b3   Denys Vlasenko   powerpc: do not m...
3036
3037
3038
3039
3040
3041
  EXPORT_SYMBOL(vm_brk_flags);
  
  int vm_brk(unsigned long addr, unsigned long len)
  {
  	return vm_brk_flags(addr, len, 0);
  }
e4eb1ff61   Linus Torvalds   VM: add "vm_brk()...
3042
  EXPORT_SYMBOL(vm_brk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3043
3044
3045
3046
  
  /* Release all mmaps. */
  void exit_mmap(struct mm_struct *mm)
  {
d16dfc550   Peter Zijlstra   mm: mmu_gather re...
3047
  	struct mmu_gather tlb;
ba470de43   Rik van Riel   mmap: handle mloc...
3048
  	struct vm_area_struct *vma;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3049
  	unsigned long nr_accounted = 0;
d6dd61c83   Jeremy Fitzhardinge   [PATCH] x86: PARA...
3050
  	/* mm's last user has gone, and its about to be pulled down */
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
3051
  	mmu_notifier_release(mm);
d6dd61c83   Jeremy Fitzhardinge   [PATCH] x86: PARA...
3052

27ae357fa   David Rientjes   mm, oom: fix conc...
3053
3054
3055
3056
  	if (unlikely(mm_is_oom_victim(mm))) {
  		/*
  		 * Manually reap the mm to free as much memory as possible.
  		 * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
3057
  		 * this mm from further consideration.  Taking mm->mmap_lock for
27ae357fa   David Rientjes   mm, oom: fix conc...
3058
  		 * write after setting MMF_OOM_SKIP will guarantee that the oom
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
3059
  		 * reaper will not run on this mm again after mmap_lock is
27ae357fa   David Rientjes   mm, oom: fix conc...
3060
3061
  		 * dropped.
  		 *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
3062
  		 * Nothing can be holding mm->mmap_lock here and the above call
27ae357fa   David Rientjes   mm, oom: fix conc...
3063
3064
3065
3066
3067
3068
3069
  		 * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
  		 * __oom_reap_task_mm() will not block.
  		 *
  		 * This needs to be done before calling munlock_vma_pages_all(),
  		 * which clears VM_LOCKED, otherwise the oom reaper cannot
  		 * reliably test it.
  		 */
93065ac75   Michal Hocko   mm, oom: distingu...
3070
  		(void)__oom_reap_task_mm(mm);
27ae357fa   David Rientjes   mm, oom: fix conc...
3071
3072
  
  		set_bit(MMF_OOM_SKIP, &mm->flags);
d8ed45c5d   Michel Lespinasse   mmap locking API:...
3073
3074
  		mmap_write_lock(mm);
  		mmap_write_unlock(mm);
27ae357fa   David Rientjes   mm, oom: fix conc...
3075
  	}
ba470de43   Rik van Riel   mmap: handle mloc...
3076
3077
3078
3079
3080
3081
3082
3083
  	if (mm->locked_vm) {
  		vma = mm->mmap;
  		while (vma) {
  			if (vma->vm_flags & VM_LOCKED)
  				munlock_vma_pages_all(vma);
  			vma = vma->vm_next;
  		}
  	}
9480c53e9   Jeremy Fitzhardinge   mm: rearrange exi...
3084
3085
  
  	arch_exit_mmap(mm);
ba470de43   Rik van Riel   mmap: handle mloc...
3086
  	vma = mm->mmap;
9480c53e9   Jeremy Fitzhardinge   mm: rearrange exi...
3087
3088
  	if (!vma)	/* Can happen if dup_mmap() received an OOM */
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3089
  	lru_add_drain();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3090
  	flush_cache_mm(mm);
2b047252d   Linus Torvalds   Fix TLB gather vi...
3091
  	tlb_gather_mmu(&tlb, mm, 0, -1);
901608d90   Oleg Nesterov   mm: introduce get...
3092
  	/* update_hiwater_rss(mm) here? but nobody should be looking */
e0da382c9   Hugh Dickins   [PATCH] freepgt: ...
3093
  	/* Use -1 here to ensure all VMAs in the mm are unmapped */
4f74d2c8e   Linus Torvalds   vm: remove 'nr_ac...
3094
  	unmap_vmas(&tlb, vma, 0, -1);
6ee8630e0   Hugh Dickins   mm: allow arch co...
3095
  	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
853f5e264   Al Viro   VM: don't bother ...
3096
  	tlb_finish_mmu(&tlb, 0, -1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3097

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3098
  	/*
8f4f8c164   Hugh Dickins   [PATCH] mm: unlin...
3099
3100
  	 * Walk the list again, actually closing and freeing it,
  	 * with preemption enabled, without holding any MM locks.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3101
  	 */
4f74d2c8e   Linus Torvalds   vm: remove 'nr_ac...
3102
3103
3104
  	while (vma) {
  		if (vma->vm_flags & VM_ACCOUNT)
  			nr_accounted += vma_pages(vma);
a8fb5618d   Hugh Dickins   [PATCH] mm: unlin...
3105
  		vma = remove_vma(vma);
0a3b3c253   Paul E. McKenney   mm/mmap.c: Add co...
3106
  		cond_resched();
4f74d2c8e   Linus Torvalds   vm: remove 'nr_ac...
3107
3108
  	}
  	vm_unacct_memory(nr_accounted);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3109
3110
3111
3112
  }
  
  /* Insert vm structure into process list sorted by address
   * and into the inode's i_mmap tree.  If vm_file is non-NULL
c8c06efa8   Davidlohr Bueso   mm: convert i_mma...
3113
   * then i_mmap_rwsem is taken here.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3114
   */
6597d7833   Hugh Dickins   mm/mmap.c: replac...
3115
  int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3116
  {
6597d7833   Hugh Dickins   mm/mmap.c: replac...
3117
3118
  	struct vm_area_struct *prev;
  	struct rb_node **rb_link, *rb_parent;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3119

c9d13f5fc   Chen Gang   mm/mmap.c:insert_...
3120
3121
3122
3123
3124
3125
  	if (find_vma_links(mm, vma->vm_start, vma->vm_end,
  			   &prev, &rb_link, &rb_parent))
  		return -ENOMEM;
  	if ((vma->vm_flags & VM_ACCOUNT) &&
  	     security_vm_enough_memory_mm(mm, vma_pages(vma)))
  		return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
  	/*
  	 * The vm_pgoff of a purely anonymous vma should be irrelevant
  	 * until its first write fault, when page's anon_vma and index
  	 * are set.  But now set the vm_pgoff it will almost certainly
  	 * end up with (unless mremap moves it elsewhere before that
  	 * first wfault), so /proc/pid/maps tells a consistent story.
  	 *
  	 * By setting it to reflect the virtual start address of the
  	 * vma, merges and splits can happen in a seamless way, just
  	 * using the existing file pgoff checks and manipulations.
8332326e8   Liao Pingfang   mm/mmap.c: replac...
3136
  	 * Similarly in do_mmap and in do_brk_flags.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3137
  	 */
8a9cc3b55   Oleg Nesterov   mmap: fix the usa...
3138
  	if (vma_is_anonymous(vma)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3139
3140
3141
  		BUG_ON(vma->anon_vma);
  		vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
  	}
2b1444983   Srikar Dronamraju   uprobes, mm, x86:...
3142

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3143
3144
3145
3146
3147
3148
3149
3150
3151
  	vma_link(mm, vma, prev, rb_link, rb_parent);
  	return 0;
  }
  
  /*
   * Copy the vma structure to a new location in the same mm,
   * prior to moving page table entries, to effect an mremap move.
   */
  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
38a76013a   Michel Lespinasse   mm: avoid taking ...
3152
3153
  	unsigned long addr, unsigned long len, pgoff_t pgoff,
  	bool *need_rmap_locks)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3154
3155
3156
3157
3158
3159
  {
  	struct vm_area_struct *vma = *vmap;
  	unsigned long vma_start = vma->vm_start;
  	struct mm_struct *mm = vma->vm_mm;
  	struct vm_area_struct *new_vma, *prev;
  	struct rb_node **rb_link, *rb_parent;
948f017b0   Andrea Arcangeli   mremap: enforce r...
3160
  	bool faulted_in_anon_vma = true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3161
3162
3163
3164
3165
  
  	/*
  	 * If anonymous vma has not yet been faulted, update new pgoff
  	 * to match new location, to increase its chance of merging.
  	 */
ce75799b8   Oleg Nesterov   mremap: fix the w...
3166
  	if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3167
  		pgoff = addr >> PAGE_SHIFT;
948f017b0   Andrea Arcangeli   mremap: enforce r...
3168
3169
  		faulted_in_anon_vma = false;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3170

6597d7833   Hugh Dickins   mm/mmap.c: replac...
3171
3172
  	if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
  		return NULL;	/* should never get here */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3173
  	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
19a809afe   Andrea Arcangeli   userfaultfd: teac...
3174
  			    vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
60500a422   Colin Cross   ANDROID: mm: add ...
3175
  			    vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3176
3177
3178
3179
  	if (new_vma) {
  		/*
  		 * Source vma may have been merged into new_vma
  		 */
948f017b0   Andrea Arcangeli   mremap: enforce r...
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
  		if (unlikely(vma_start >= new_vma->vm_start &&
  			     vma_start < new_vma->vm_end)) {
  			/*
  			 * The only way we can get a vma_merge with
  			 * self during an mremap is if the vma hasn't
  			 * been faulted in yet and we were allowed to
  			 * reset the dst vma->vm_pgoff to the
  			 * destination address of the mremap to allow
  			 * the merge to happen. mremap must change the
  			 * vm_pgoff linearity between src and dst vmas
  			 * (in turn preventing a vma_merge) to be
  			 * safe. It is only safe to keep the vm_pgoff
  			 * linear if there are no pages mapped yet.
  			 */
81d1b09c6   Sasha Levin   mm: convert a few...
3194
  			VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
38a76013a   Michel Lespinasse   mm: avoid taking ...
3195
  			*vmap = vma = new_vma;
108d6642a   Michel Lespinasse   mm anon rmap: rem...
3196
  		}
38a76013a   Michel Lespinasse   mm: avoid taking ...
3197
  		*need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3198
  	} else {
3928d4f5e   Linus Torvalds   mm: use helper fu...
3199
  		new_vma = vm_area_dup(vma);
e39758912   Chen Gang   mm/mmap.c: simpli...
3200
3201
  		if (!new_vma)
  			goto out;
e39758912   Chen Gang   mm/mmap.c: simpli...
3202
3203
3204
3205
3206
  		new_vma->vm_start = addr;
  		new_vma->vm_end = addr + len;
  		new_vma->vm_pgoff = pgoff;
  		if (vma_dup_policy(vma, new_vma))
  			goto out_free_vma;
e39758912   Chen Gang   mm/mmap.c: simpli...
3207
3208
3209
3210
3211
3212
3213
3214
  		if (anon_vma_clone(new_vma, vma))
  			goto out_free_mempol;
  		if (new_vma->vm_file)
  			get_file(new_vma->vm_file);
  		if (new_vma->vm_ops && new_vma->vm_ops->open)
  			new_vma->vm_ops->open(new_vma);
  		vma_link(mm, new_vma, prev, rb_link, rb_parent);
  		*need_rmap_locks = false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3215
3216
  	}
  	return new_vma;
5beb49305   Rik van Riel   mm: change anon_v...
3217

e39758912   Chen Gang   mm/mmap.c: simpli...
3218
  out_free_mempol:
ef0855d33   Oleg Nesterov   mm: mempolicy: tu...
3219
  	mpol_put(vma_policy(new_vma));
e39758912   Chen Gang   mm/mmap.c: simpli...
3220
  out_free_vma:
3928d4f5e   Linus Torvalds   mm: use helper fu...
3221
  	vm_area_free(new_vma);
e39758912   Chen Gang   mm/mmap.c: simpli...
3222
  out:
5beb49305   Rik van Riel   mm: change anon_v...
3223
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3224
  }
119f657c7   Andrew Morton   [PATCH] RLIMIT_AS...
3225
3226
3227
3228
3229
  
  /*
   * Return true if the calling process may expand its vm space by the passed
   * number of pages
   */
846383359   Konstantin Khlebnikov   mm: rework virtua...
3230
  bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
119f657c7   Andrew Morton   [PATCH] RLIMIT_AS...
3231
  {
846383359   Konstantin Khlebnikov   mm: rework virtua...
3232
3233
  	if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
  		return false;
119f657c7   Andrew Morton   [PATCH] RLIMIT_AS...
3234

d977d56ce   Konstantin Khlebnikov   mm: warn about Vm...
3235
3236
  	if (is_data_mapping(flags) &&
  	    mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
f4fcd5584   Konstantin Khlebnikov   mm: enable RLIMIT...
3237
3238
3239
3240
  		/* Workaround for Valgrind */
  		if (rlimit(RLIMIT_DATA) == 0 &&
  		    mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
  			return true;
57a7702b1   David Woodhouse   mm: always print ...
3241
3242
3243
3244
3245
3246
3247
3248
3249
  
  		pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.
  ",
  			     current->comm, current->pid,
  			     (mm->data_vm + npages) << PAGE_SHIFT,
  			     rlimit(RLIMIT_DATA),
  			     ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
  
  		if (!ignore_rlimit_data)
d977d56ce   Konstantin Khlebnikov   mm: warn about Vm...
3250
3251
  			return false;
  	}
119f657c7   Andrew Morton   [PATCH] RLIMIT_AS...
3252

846383359   Konstantin Khlebnikov   mm: rework virtua...
3253
3254
3255
3256
3257
3258
  	return true;
  }
  
  void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
  {
  	mm->total_vm += npages;
d977d56ce   Konstantin Khlebnikov   mm: warn about Vm...
3259
  	if (is_exec_mapping(flags))
846383359   Konstantin Khlebnikov   mm: rework virtua...
3260
  		mm->exec_vm += npages;
d977d56ce   Konstantin Khlebnikov   mm: warn about Vm...
3261
  	else if (is_stack_mapping(flags))
846383359   Konstantin Khlebnikov   mm: rework virtua...
3262
  		mm->stack_vm += npages;
d977d56ce   Konstantin Khlebnikov   mm: warn about Vm...
3263
  	else if (is_data_mapping(flags))
846383359   Konstantin Khlebnikov   mm: rework virtua...
3264
  		mm->data_vm += npages;
119f657c7   Andrew Morton   [PATCH] RLIMIT_AS...
3265
  }
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3266

b3ec9f33a   Souptick Joarder   mm: change return...
3267
  static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
  
  /*
   * Having a close hook prevents vma merging regardless of flags.
   */
  static void special_mapping_close(struct vm_area_struct *vma)
  {
  }
  
  static const char *special_mapping_name(struct vm_area_struct *vma)
  {
  	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
  }
b059a453b   Dmitry Safonov   x86/vdso: Add mre...
3280
3281
3282
  static int special_mapping_mremap(struct vm_area_struct *new_vma)
  {
  	struct vm_special_mapping *sm = new_vma->vm_private_data;
280e87e98   Dmitry Safonov   ARM: 8683/1: ARM3...
3283
3284
  	if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
  		return -EFAULT;
b059a453b   Dmitry Safonov   x86/vdso: Add mre...
3285
3286
  	if (sm->mremap)
  		return sm->mremap(sm, new_vma);
280e87e98   Dmitry Safonov   ARM: 8683/1: ARM3...
3287

b059a453b   Dmitry Safonov   x86/vdso: Add mre...
3288
3289
  	return 0;
  }
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3290
3291
3292
  static const struct vm_operations_struct special_mapping_vmops = {
  	.close = special_mapping_close,
  	.fault = special_mapping_fault,
b059a453b   Dmitry Safonov   x86/vdso: Add mre...
3293
  	.mremap = special_mapping_mremap,
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3294
  	.name = special_mapping_name,
af34ebeb8   Dmitry Safonov   x86/vdso: Handle ...
3295
3296
  	/* vDSO code relies that VVAR can't be accessed remotely */
  	.access = NULL,
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3297
3298
3299
3300
3301
3302
  };
  
  static const struct vm_operations_struct legacy_special_mapping_vmops = {
  	.close = special_mapping_close,
  	.fault = special_mapping_fault,
  };
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3303

b3ec9f33a   Souptick Joarder   mm: change return...
3304
  static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3305
  {
11bac8000   Dave Jiang   mm, fs: reduce fa...
3306
  	struct vm_area_struct *vma = vmf->vma;
b1d0e4f53   Nick Piggin   mm: special mappi...
3307
  	pgoff_t pgoff;
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3308
  	struct page **pages;
f872f5400   Andy Lutomirski   mm: Add a vm_spec...
3309
  	if (vma->vm_ops == &legacy_special_mapping_vmops) {
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3310
  		pages = vma->vm_private_data;
f872f5400   Andy Lutomirski   mm: Add a vm_spec...
3311
3312
3313
3314
  	} else {
  		struct vm_special_mapping *sm = vma->vm_private_data;
  
  		if (sm->fault)
11bac8000   Dave Jiang   mm, fs: reduce fa...
3315
  			return sm->fault(sm, vmf->vma, vmf);
f872f5400   Andy Lutomirski   mm: Add a vm_spec...
3316
3317
3318
  
  		pages = sm->pages;
  	}
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3319

8a9cc3b55   Oleg Nesterov   mmap: fix the usa...
3320
  	for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
b1d0e4f53   Nick Piggin   mm: special mappi...
3321
  		pgoff--;
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3322
3323
3324
3325
  
  	if (*pages) {
  		struct page *page = *pages;
  		get_page(page);
b1d0e4f53   Nick Piggin   mm: special mappi...
3326
3327
  		vmf->page = page;
  		return 0;
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3328
  	}
b1d0e4f53   Nick Piggin   mm: special mappi...
3329
  	return VM_FAULT_SIGBUS;
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3330
  }
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3331
3332
3333
  static struct vm_area_struct *__install_special_mapping(
  	struct mm_struct *mm,
  	unsigned long addr, unsigned long len,
27f28b972   Chen Gang   mm/mmap.c: change...
3334
3335
  	unsigned long vm_flags, void *priv,
  	const struct vm_operations_struct *ops)
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3336
  {
462e635e5   Tavis Ormandy   install_special_m...
3337
  	int ret;
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3338
  	struct vm_area_struct *vma;
490fc0538   Linus Torvalds   mm: make vm_area_...
3339
  	vma = vm_area_alloc(mm);
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3340
  	if (unlikely(vma == NULL))
3935ed6a3   Stefani Seibold   mm: Add new func ...
3341
  		return ERR_PTR(-ENOMEM);
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3342

fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3343
3344
  	vma->vm_start = addr;
  	vma->vm_end = addr + len;
d9104d1ca   Cyrill Gorcunov   mm: track vma cha...
3345
  	vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3ed75eb8f   Coly Li   setup vma->vm_pag...
3346
  	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3347

a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3348
3349
  	vma->vm_ops = ops;
  	vma->vm_private_data = priv;
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3350

462e635e5   Tavis Ormandy   install_special_m...
3351
3352
3353
  	ret = insert_vm_struct(mm, vma);
  	if (ret)
  		goto out;
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3354

846383359   Konstantin Khlebnikov   mm: rework virtua...
3355
  	vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3356

cdd6c482c   Ingo Molnar   perf: Do the big ...
3357
  	perf_event_mmap(vma);
089dd79db   Peter Zijlstra   perf_counter: Gen...
3358

3935ed6a3   Stefani Seibold   mm: Add new func ...
3359
  	return vma;
462e635e5   Tavis Ormandy   install_special_m...
3360
3361
  
  out:
3928d4f5e   Linus Torvalds   mm: use helper fu...
3362
  	vm_area_free(vma);
3935ed6a3   Stefani Seibold   mm: Add new func ...
3363
3364
  	return ERR_PTR(ret);
  }
2eefd8789   Dmitry Safonov   x86/arch_prctl/vd...
3365
3366
3367
3368
3369
3370
3371
  bool vma_is_special_mapping(const struct vm_area_struct *vma,
  	const struct vm_special_mapping *sm)
  {
  	return vma->vm_private_data == sm &&
  		(vma->vm_ops == &special_mapping_vmops ||
  		 vma->vm_ops == &legacy_special_mapping_vmops);
  }
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3372
  /*
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
3373
   * Called with mm->mmap_lock held for writing.
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
   * Insert a new vma covering the given region, with the given flags.
   * Its pages are supplied by the given array of struct page *.
   * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
   * The region past the last page supplied will always produce SIGBUS.
   * The array pointer and the pages it points to are assumed to stay alive
   * for as long as this mapping might exist.
   */
  struct vm_area_struct *_install_special_mapping(
  	struct mm_struct *mm,
  	unsigned long addr, unsigned long len,
  	unsigned long vm_flags, const struct vm_special_mapping *spec)
  {
27f28b972   Chen Gang   mm/mmap.c: change...
3386
3387
  	return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
  					&special_mapping_vmops);
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3388
  }
3935ed6a3   Stefani Seibold   mm: Add new func ...
3389
3390
3391
3392
  int install_special_mapping(struct mm_struct *mm,
  			    unsigned long addr, unsigned long len,
  			    unsigned long vm_flags, struct page **pages)
  {
a62c34bd2   Andy Lutomirski   x86, mm: Improve ...
3393
  	struct vm_area_struct *vma = __install_special_mapping(
27f28b972   Chen Gang   mm/mmap.c: change...
3394
3395
  		mm, addr, len, vm_flags, (void *)pages,
  		&legacy_special_mapping_vmops);
3935ed6a3   Stefani Seibold   mm: Add new func ...
3396

14bd5b458   Duan Jiong   mm/mmap.c: replac...
3397
  	return PTR_ERR_OR_ZERO(vma);
fa5dc22f8   Roland McGrath   [PATCH] Add insta...
3398
  }
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3399
3400
  
  static DEFINE_MUTEX(mm_all_locks_mutex);
454ed842d   Peter Zijlstra   lockdep: annotate...
3401
  static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3402
  {
f808c13fd   Davidlohr Bueso   lib/interval_tree...
3403
  	if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3404
3405
3406
3407
  		/*
  		 * The LSB of head.next can't change from under us
  		 * because we hold the mm_all_locks_mutex.
  		 */
da1c55f1b   Michel Lespinasse   mmap locking API:...
3408
  		down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3409
3410
  		/*
  		 * We can safely modify head.next after taking the
5a505085f   Ingo Molnar   mm/rmap: Convert ...
3411
  		 * anon_vma->root->rwsem. If some other vma in this mm shares
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3412
3413
3414
3415
  		 * the same anon_vma we won't take it again.
  		 *
  		 * No need of atomic instructions here, head.next
  		 * can't change from under us thanks to the
5a505085f   Ingo Molnar   mm/rmap: Convert ...
3416
  		 * anon_vma->root->rwsem.
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3417
3418
  		 */
  		if (__test_and_set_bit(0, (unsigned long *)
f808c13fd   Davidlohr Bueso   lib/interval_tree...
3419
  				       &anon_vma->root->rb_root.rb_root.rb_node))
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3420
3421
3422
  			BUG();
  	}
  }
454ed842d   Peter Zijlstra   lockdep: annotate...
3423
  static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
  {
  	if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
  		/*
  		 * AS_MM_ALL_LOCKS can't change from under us because
  		 * we hold the mm_all_locks_mutex.
  		 *
  		 * Operations on ->flags have to be atomic because
  		 * even if AS_MM_ALL_LOCKS is stable thanks to the
  		 * mm_all_locks_mutex, there may be other cpus
  		 * changing other bitflags in parallel to us.
  		 */
  		if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
  			BUG();
da1c55f1b   Michel Lespinasse   mmap locking API:...
3437
  		down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3438
3439
3440
3441
3442
3443
3444
3445
  	}
  }
  
  /*
   * This operation locks against the VM for all pte/vma/mm related
   * operations that could ever happen on a certain mm. This includes
   * vmtruncate, try_to_unmap, and all page faults.
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
3446
   * The caller must take the mmap_lock in write mode before calling
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3447
   * mm_take_all_locks(). The caller isn't allowed to release the
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
3448
   * mmap_lock until mm_drop_all_locks() returns.
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3449
   *
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
3450
   * mmap_lock in write mode is required in order to block all operations
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3451
   * that could modify pagetables and free pages without need of
27ba0644e   Kirill A. Shutemov   rmap: drop suppor...
3452
   * altering the vma layout. It's also needed in write mode to avoid new
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3453
3454
3455
3456
3457
   * anon_vmas to be associated with existing vmas.
   *
   * A single task can't take more than one mm_take_all_locks() in a row
   * or it would deadlock.
   *
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
3458
   * The LSB in anon_vma->rb_root.rb_node and the AS_MM_ALL_LOCKS bitflag in
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3459
3460
3461
   * mapping->flags avoid to take the same lock twice, if more than one
   * vma in this mm is backed by the same anon_vma or address_space.
   *
88f306b68   Kirill A. Shutemov   mm: fix locking o...
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
   * We take locks in following order, accordingly to comment at beginning
   * of mm/rmap.c:
   *   - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for
   *     hugetlb mapping);
   *   - all i_mmap_rwsem locks;
   *   - all anon_vma->rwseml
   *
   * We can take all locks within these types randomly because the VM code
   * doesn't nest them and we protected from parallel mm_take_all_locks() by
   * mm_all_locks_mutex.
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3472
3473
3474
3475
3476
3477
3478
3479
3480
   *
   * mm_take_all_locks() and mm_drop_all_locks are expensive operations
   * that may have to take thousand of locks.
   *
   * mm_take_all_locks() can fail if it's interrupted by signals.
   */
  int mm_take_all_locks(struct mm_struct *mm)
  {
  	struct vm_area_struct *vma;
5beb49305   Rik van Riel   mm: change anon_v...
3481
  	struct anon_vma_chain *avc;
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3482

d8ed45c5d   Michel Lespinasse   mmap locking API:...
3483
  	BUG_ON(mmap_read_trylock(mm));
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3484
3485
3486
3487
3488
3489
  
  	mutex_lock(&mm_all_locks_mutex);
  
  	for (vma = mm->mmap; vma; vma = vma->vm_next) {
  		if (signal_pending(current))
  			goto out_unlock;
88f306b68   Kirill A. Shutemov   mm: fix locking o...
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
  		if (vma->vm_file && vma->vm_file->f_mapping &&
  				is_vm_hugetlb_page(vma))
  			vm_lock_mapping(mm, vma->vm_file->f_mapping);
  	}
  
  	for (vma = mm->mmap; vma; vma = vma->vm_next) {
  		if (signal_pending(current))
  			goto out_unlock;
  		if (vma->vm_file && vma->vm_file->f_mapping &&
  				!is_vm_hugetlb_page(vma))
454ed842d   Peter Zijlstra   lockdep: annotate...
3500
  			vm_lock_mapping(mm, vma->vm_file->f_mapping);
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3501
  	}
7cd5a02f5   Peter Zijlstra   mm: fix mm_take_a...
3502
3503
3504
3505
3506
  
  	for (vma = mm->mmap; vma; vma = vma->vm_next) {
  		if (signal_pending(current))
  			goto out_unlock;
  		if (vma->anon_vma)
5beb49305   Rik van Riel   mm: change anon_v...
3507
3508
  			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
  				vm_lock_anon_vma(mm, avc->anon_vma);
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3509
  	}
7cd5a02f5   Peter Zijlstra   mm: fix mm_take_a...
3510

584cff54e   Kautuk Consul   mm/mmap.c: elimin...
3511
  	return 0;
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3512
3513
  
  out_unlock:
584cff54e   Kautuk Consul   mm/mmap.c: elimin...
3514
3515
  	mm_drop_all_locks(mm);
  	return -EINTR;
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3516
3517
3518
3519
  }
  
  static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
  {
f808c13fd   Davidlohr Bueso   lib/interval_tree...
3520
  	if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3521
3522
3523
3524
3525
  		/*
  		 * The LSB of head.next can't change to 0 from under
  		 * us because we hold the mm_all_locks_mutex.
  		 *
  		 * We must however clear the bitflag before unlocking
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
3526
  		 * the vma so the users using the anon_vma->rb_root will
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3527
3528
3529
3530
  		 * never see our bitflag.
  		 *
  		 * No need of atomic instructions here, head.next
  		 * can't change from under us until we release the
5a505085f   Ingo Molnar   mm/rmap: Convert ...
3531
  		 * anon_vma->root->rwsem.
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3532
3533
  		 */
  		if (!__test_and_clear_bit(0, (unsigned long *)
f808c13fd   Davidlohr Bueso   lib/interval_tree...
3534
  					  &anon_vma->root->rb_root.rb_root.rb_node))
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3535
  			BUG();
08b52706d   Konstantin Khlebnikov   mm/rmap: rename a...
3536
  		anon_vma_unlock_write(anon_vma);
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
  	}
  }
  
  static void vm_unlock_mapping(struct address_space *mapping)
  {
  	if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
  		/*
  		 * AS_MM_ALL_LOCKS can't change to 0 from under us
  		 * because we hold the mm_all_locks_mutex.
  		 */
83cde9e8b   Davidlohr Bueso   mm: use new helpe...
3547
  		i_mmap_unlock_write(mapping);
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3548
3549
3550
3551
3552
3553
3554
  		if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
  					&mapping->flags))
  			BUG();
  	}
  }
  
  /*
c1e8d7c6a   Michel Lespinasse   mmap locking API:...
3555
   * The mmap_lock cannot be released by the caller until
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3556
3557
3558
3559
3560
   * mm_drop_all_locks() returns.
   */
  void mm_drop_all_locks(struct mm_struct *mm)
  {
  	struct vm_area_struct *vma;
5beb49305   Rik van Riel   mm: change anon_v...
3561
  	struct anon_vma_chain *avc;
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3562

d8ed45c5d   Michel Lespinasse   mmap locking API:...
3563
  	BUG_ON(mmap_read_trylock(mm));
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3564
3565
3566
3567
  	BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
  
  	for (vma = mm->mmap; vma; vma = vma->vm_next) {
  		if (vma->anon_vma)
5beb49305   Rik van Riel   mm: change anon_v...
3568
3569
  			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
  				vm_unlock_anon_vma(avc->anon_vma);
7906d00cd   Andrea Arcangeli   mmu-notifiers: ad...
3570
3571
3572
3573
3574
3575
  		if (vma->vm_file && vma->vm_file->f_mapping)
  			vm_unlock_mapping(vma->vm_file->f_mapping);
  	}
  
  	mutex_unlock(&mm_all_locks_mutex);
  }
8feae1311   David Howells   NOMMU: Make VMAs ...
3576
3577
  
  /*
3edf41d84   seokhoon.yoon   mm: fix comments ...
3578
   * initialise the percpu counter for VM
8feae1311   David Howells   NOMMU: Make VMAs ...
3579
3580
3581
   */
  void __init mmap_init(void)
  {
00a62ce91   KOSAKI Motohiro   mm: fix Committed...
3582
  	int ret;
908c7f194   Tejun Heo   percpu_counter: a...
3583
  	ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
00a62ce91   KOSAKI Motohiro   mm: fix Committed...
3584
  	VM_BUG_ON(ret);
8feae1311   David Howells   NOMMU: Make VMAs ...
3585
  }
c9b1d0981   Andrew Shewmaker   mm: limit growth ...
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
  
  /*
   * Initialise sysctl_user_reserve_kbytes.
   *
   * This is intended to prevent a user from starting a single memory hogging
   * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER
   * mode.
   *
   * The default value is min(3% of free memory, 128MB)
   * 128MB is enough to recover with sshd/login, bash, and top/kill.
   */
1640879af   Andrew Shewmaker   mm: reinititalise...
3597
  static int init_user_reserve(void)
c9b1d0981   Andrew Shewmaker   mm: limit growth ...
3598
3599
  {
  	unsigned long free_kbytes;
c41f012ad   Michal Hocko   mm: rename global...
3600
  	free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
c9b1d0981   Andrew Shewmaker   mm: limit growth ...
3601
3602
3603
3604
  
  	sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
  	return 0;
  }
a64fb3cd6   Paul Gortmaker   mm: audit/fix non...
3605
  subsys_initcall(init_user_reserve);
4eeab4f55   Andrew Shewmaker   mm: replace hardc...
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
  
  /*
   * Initialise sysctl_admin_reserve_kbytes.
   *
   * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin
   * to log in and kill a memory hogging process.
   *
   * Systems with more than 256MB will reserve 8MB, enough to recover
   * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will
   * only reserve 3% of free pages by default.
   */
1640879af   Andrew Shewmaker   mm: reinititalise...
3617
  static int init_admin_reserve(void)
4eeab4f55   Andrew Shewmaker   mm: replace hardc...
3618
3619
  {
  	unsigned long free_kbytes;
c41f012ad   Michal Hocko   mm: rename global...
3620
  	free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
4eeab4f55   Andrew Shewmaker   mm: replace hardc...
3621
3622
3623
3624
  
  	sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
  	return 0;
  }
a64fb3cd6   Paul Gortmaker   mm: audit/fix non...
3625
  subsys_initcall(init_admin_reserve);
1640879af   Andrew Shewmaker   mm: reinititalise...
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
  
  /*
   * Reinititalise user and admin reserves if memory is added or removed.
   *
   * The default user reserve max is 128MB, and the default max for the
   * admin reserve is 8MB. These are usually, but not always, enough to
   * enable recovery from a memory hogging process using login/sshd, a shell,
   * and tools like top. It may make sense to increase or even disable the
   * reserve depending on the existence of swap or variations in the recovery
   * tools. So, the admin may have changed them.
   *
   * If memory is added and the reserves have been eliminated or increased above
   * the default max, then we'll trust the admin.
   *
   * If memory is removed and there isn't enough free memory, then we
   * need to reset the reserves.
   *
   * Otherwise keep the reserve set by the admin.
   */
  static int reserve_mem_notifier(struct notifier_block *nb,
  			     unsigned long action, void *data)
  {
  	unsigned long tmp, free_kbytes;
  
  	switch (action) {
  	case MEM_ONLINE:
  		/* Default max is 128MB. Leave alone if modified by operator. */
  		tmp = sysctl_user_reserve_kbytes;
  		if (0 < tmp && tmp < (1UL << 17))
  			init_user_reserve();
  
  		/* Default max is 8MB.  Leave alone if modified by operator. */
  		tmp = sysctl_admin_reserve_kbytes;
  		if (0 < tmp && tmp < (1UL << 13))
  			init_admin_reserve();
  
  		break;
  	case MEM_OFFLINE:
c41f012ad   Michal Hocko   mm: rename global...
3664
  		free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
1640879af   Andrew Shewmaker   mm: reinititalise...
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
  
  		if (sysctl_user_reserve_kbytes > free_kbytes) {
  			init_user_reserve();
  			pr_info("vm.user_reserve_kbytes reset to %lu
  ",
  				sysctl_user_reserve_kbytes);
  		}
  
  		if (sysctl_admin_reserve_kbytes > free_kbytes) {
  			init_admin_reserve();
  			pr_info("vm.admin_reserve_kbytes reset to %lu
  ",
  				sysctl_admin_reserve_kbytes);
  		}
  		break;
  	default:
  		break;
  	}
  	return NOTIFY_OK;
  }
  
  static struct notifier_block reserve_mem_nb = {
  	.notifier_call = reserve_mem_notifier,
  };
  
  static int __meminit init_reserve_notifier(void)
  {
  	if (register_hotmemory_notifier(&reserve_mem_nb))
b1de0d139   Mitchel Humpherys   mm: convert some ...
3693
3694
  		pr_err("Failed registering memory add/remove notifier for admin reserve
  ");
1640879af   Andrew Shewmaker   mm: reinititalise...
3695
3696
3697
  
  	return 0;
  }
a64fb3cd6   Paul Gortmaker   mm: audit/fix non...
3698
  subsys_initcall(init_reserve_notifier);