Commit 8c9ed899b44c19e81859fbb0e9d659fe2f8630fc

Authored by David Howells
Committed by Linus Torvalds
1 parent ee7fee0b91

NOMMU: Don't check vm_region::vm_start is page aligned in add_nommu_region()

Don't check vm_region::vm_start is page aligned in add_nommu_region() because
the region may reflect some non-page-aligned mapped file, such as could be
obtained from RomFS XIP.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 1 changed file with 0 additions and 2 deletions Inline Diff

1 /* 1 /*
2 * linux/mm/nommu.c 2 * linux/mm/nommu.c
3 * 3 *
4 * Replacement code for mm functions to support CPU's that don't 4 * Replacement code for mm functions to support CPU's that don't
5 * have any form of memory management unit (thus no virtual memory). 5 * have any form of memory management unit (thus no virtual memory).
6 * 6 *
7 * See Documentation/nommu-mmap.txt 7 * See Documentation/nommu-mmap.txt
8 * 8 *
9 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> 9 * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com>
10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
13 * Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org> 13 * Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org>
14 */ 14 */
15 15
16 #include <linux/module.h> 16 #include <linux/module.h>
17 #include <linux/mm.h> 17 #include <linux/mm.h>
18 #include <linux/mman.h> 18 #include <linux/mman.h>
19 #include <linux/swap.h> 19 #include <linux/swap.h>
20 #include <linux/file.h> 20 #include <linux/file.h>
21 #include <linux/highmem.h> 21 #include <linux/highmem.h>
22 #include <linux/pagemap.h> 22 #include <linux/pagemap.h>
23 #include <linux/slab.h> 23 #include <linux/slab.h>
24 #include <linux/vmalloc.h> 24 #include <linux/vmalloc.h>
25 #include <linux/tracehook.h> 25 #include <linux/tracehook.h>
26 #include <linux/blkdev.h> 26 #include <linux/blkdev.h>
27 #include <linux/backing-dev.h> 27 #include <linux/backing-dev.h>
28 #include <linux/mount.h> 28 #include <linux/mount.h>
29 #include <linux/personality.h> 29 #include <linux/personality.h>
30 #include <linux/security.h> 30 #include <linux/security.h>
31 #include <linux/syscalls.h> 31 #include <linux/syscalls.h>
32 32
33 #include <asm/uaccess.h> 33 #include <asm/uaccess.h>
34 #include <asm/tlb.h> 34 #include <asm/tlb.h>
35 #include <asm/tlbflush.h> 35 #include <asm/tlbflush.h>
36 #include "internal.h" 36 #include "internal.h"
37 37
38 static inline __attribute__((format(printf, 1, 2))) 38 static inline __attribute__((format(printf, 1, 2)))
39 void no_printk(const char *fmt, ...) 39 void no_printk(const char *fmt, ...)
40 { 40 {
41 } 41 }
42 42
43 #if 0 43 #if 0
44 #define kenter(FMT, ...) \ 44 #define kenter(FMT, ...) \
45 printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) 45 printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
46 #define kleave(FMT, ...) \ 46 #define kleave(FMT, ...) \
47 printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) 47 printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
48 #define kdebug(FMT, ...) \ 48 #define kdebug(FMT, ...) \
49 printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__) 49 printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__)
50 #else 50 #else
51 #define kenter(FMT, ...) \ 51 #define kenter(FMT, ...) \
52 no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) 52 no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
53 #define kleave(FMT, ...) \ 53 #define kleave(FMT, ...) \
54 no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) 54 no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
55 #define kdebug(FMT, ...) \ 55 #define kdebug(FMT, ...) \
56 no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__) 56 no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__)
57 #endif 57 #endif
58 58
59 #include "internal.h" 59 #include "internal.h"
60 60
61 void *high_memory; 61 void *high_memory;
62 struct page *mem_map; 62 struct page *mem_map;
63 unsigned long max_mapnr; 63 unsigned long max_mapnr;
64 unsigned long num_physpages; 64 unsigned long num_physpages;
65 struct percpu_counter vm_committed_as; 65 struct percpu_counter vm_committed_as;
66 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 66 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
67 int sysctl_overcommit_ratio = 50; /* default is 50% */ 67 int sysctl_overcommit_ratio = 50; /* default is 50% */
68 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 68 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
69 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 69 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
70 int heap_stack_gap = 0; 70 int heap_stack_gap = 0;
71 71
72 atomic_long_t mmap_pages_allocated; 72 atomic_long_t mmap_pages_allocated;
73 73
74 EXPORT_SYMBOL(mem_map); 74 EXPORT_SYMBOL(mem_map);
75 EXPORT_SYMBOL(num_physpages); 75 EXPORT_SYMBOL(num_physpages);
76 76
77 /* list of mapped, potentially shareable regions */ 77 /* list of mapped, potentially shareable regions */
78 static struct kmem_cache *vm_region_jar; 78 static struct kmem_cache *vm_region_jar;
79 struct rb_root nommu_region_tree = RB_ROOT; 79 struct rb_root nommu_region_tree = RB_ROOT;
80 DECLARE_RWSEM(nommu_region_sem); 80 DECLARE_RWSEM(nommu_region_sem);
81 81
82 struct vm_operations_struct generic_file_vm_ops = { 82 struct vm_operations_struct generic_file_vm_ops = {
83 }; 83 };
84 84
85 /* 85 /*
86 * Handle all mappings that got truncated by a "truncate()" 86 * Handle all mappings that got truncated by a "truncate()"
87 * system call. 87 * system call.
88 * 88 *
89 * NOTE! We have to be ready to update the memory sharing 89 * NOTE! We have to be ready to update the memory sharing
90 * between the file and the memory map for a potential last 90 * between the file and the memory map for a potential last
91 * incomplete page. Ugly, but necessary. 91 * incomplete page. Ugly, but necessary.
92 */ 92 */
93 int vmtruncate(struct inode *inode, loff_t offset) 93 int vmtruncate(struct inode *inode, loff_t offset)
94 { 94 {
95 struct address_space *mapping = inode->i_mapping; 95 struct address_space *mapping = inode->i_mapping;
96 unsigned long limit; 96 unsigned long limit;
97 97
98 if (inode->i_size < offset) 98 if (inode->i_size < offset)
99 goto do_expand; 99 goto do_expand;
100 i_size_write(inode, offset); 100 i_size_write(inode, offset);
101 101
102 truncate_inode_pages(mapping, offset); 102 truncate_inode_pages(mapping, offset);
103 goto out_truncate; 103 goto out_truncate;
104 104
105 do_expand: 105 do_expand:
106 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 106 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
107 if (limit != RLIM_INFINITY && offset > limit) 107 if (limit != RLIM_INFINITY && offset > limit)
108 goto out_sig; 108 goto out_sig;
109 if (offset > inode->i_sb->s_maxbytes) 109 if (offset > inode->i_sb->s_maxbytes)
110 goto out; 110 goto out;
111 i_size_write(inode, offset); 111 i_size_write(inode, offset);
112 112
113 out_truncate: 113 out_truncate:
114 if (inode->i_op->truncate) 114 if (inode->i_op->truncate)
115 inode->i_op->truncate(inode); 115 inode->i_op->truncate(inode);
116 return 0; 116 return 0;
117 out_sig: 117 out_sig:
118 send_sig(SIGXFSZ, current, 0); 118 send_sig(SIGXFSZ, current, 0);
119 out: 119 out:
120 return -EFBIG; 120 return -EFBIG;
121 } 121 }
122 122
123 EXPORT_SYMBOL(vmtruncate); 123 EXPORT_SYMBOL(vmtruncate);
124 124
125 /* 125 /*
126 * Return the total memory allocated for this pointer, not 126 * Return the total memory allocated for this pointer, not
127 * just what the caller asked for. 127 * just what the caller asked for.
128 * 128 *
129 * Doesn't have to be accurate, i.e. may have races. 129 * Doesn't have to be accurate, i.e. may have races.
130 */ 130 */
131 unsigned int kobjsize(const void *objp) 131 unsigned int kobjsize(const void *objp)
132 { 132 {
133 struct page *page; 133 struct page *page;
134 134
135 /* 135 /*
136 * If the object we have should not have ksize performed on it, 136 * If the object we have should not have ksize performed on it,
137 * return size of 0 137 * return size of 0
138 */ 138 */
139 if (!objp || !virt_addr_valid(objp)) 139 if (!objp || !virt_addr_valid(objp))
140 return 0; 140 return 0;
141 141
142 page = virt_to_head_page(objp); 142 page = virt_to_head_page(objp);
143 143
144 /* 144 /*
145 * If the allocator sets PageSlab, we know the pointer came from 145 * If the allocator sets PageSlab, we know the pointer came from
146 * kmalloc(). 146 * kmalloc().
147 */ 147 */
148 if (PageSlab(page)) 148 if (PageSlab(page))
149 return ksize(objp); 149 return ksize(objp);
150 150
151 /* 151 /*
152 * If it's not a compound page, see if we have a matching VMA 152 * If it's not a compound page, see if we have a matching VMA
153 * region. This test is intentionally done in reverse order, 153 * region. This test is intentionally done in reverse order,
154 * so if there's no VMA, we still fall through and hand back 154 * so if there's no VMA, we still fall through and hand back
155 * PAGE_SIZE for 0-order pages. 155 * PAGE_SIZE for 0-order pages.
156 */ 156 */
157 if (!PageCompound(page)) { 157 if (!PageCompound(page)) {
158 struct vm_area_struct *vma; 158 struct vm_area_struct *vma;
159 159
160 vma = find_vma(current->mm, (unsigned long)objp); 160 vma = find_vma(current->mm, (unsigned long)objp);
161 if (vma) 161 if (vma)
162 return vma->vm_end - vma->vm_start; 162 return vma->vm_end - vma->vm_start;
163 } 163 }
164 164
165 /* 165 /*
166 * The ksize() function is only guaranteed to work for pointers 166 * The ksize() function is only guaranteed to work for pointers
167 * returned by kmalloc(). So handle arbitrary pointers here. 167 * returned by kmalloc(). So handle arbitrary pointers here.
168 */ 168 */
169 return PAGE_SIZE << compound_order(page); 169 return PAGE_SIZE << compound_order(page);
170 } 170 }
171 171
172 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 172 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
173 unsigned long start, int len, int flags, 173 unsigned long start, int len, int flags,
174 struct page **pages, struct vm_area_struct **vmas) 174 struct page **pages, struct vm_area_struct **vmas)
175 { 175 {
176 struct vm_area_struct *vma; 176 struct vm_area_struct *vma;
177 unsigned long vm_flags; 177 unsigned long vm_flags;
178 int i; 178 int i;
179 int write = !!(flags & GUP_FLAGS_WRITE); 179 int write = !!(flags & GUP_FLAGS_WRITE);
180 int force = !!(flags & GUP_FLAGS_FORCE); 180 int force = !!(flags & GUP_FLAGS_FORCE);
181 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); 181 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
182 182
183 /* calculate required read or write permissions. 183 /* calculate required read or write permissions.
184 * - if 'force' is set, we only require the "MAY" flags. 184 * - if 'force' is set, we only require the "MAY" flags.
185 */ 185 */
186 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 186 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
187 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 187 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
188 188
189 for (i = 0; i < len; i++) { 189 for (i = 0; i < len; i++) {
190 vma = find_vma(mm, start); 190 vma = find_vma(mm, start);
191 if (!vma) 191 if (!vma)
192 goto finish_or_fault; 192 goto finish_or_fault;
193 193
194 /* protect what we can, including chardevs */ 194 /* protect what we can, including chardevs */
195 if (vma->vm_flags & (VM_IO | VM_PFNMAP) || 195 if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
196 (!ignore && !(vm_flags & vma->vm_flags))) 196 (!ignore && !(vm_flags & vma->vm_flags)))
197 goto finish_or_fault; 197 goto finish_or_fault;
198 198
199 if (pages) { 199 if (pages) {
200 pages[i] = virt_to_page(start); 200 pages[i] = virt_to_page(start);
201 if (pages[i]) 201 if (pages[i])
202 page_cache_get(pages[i]); 202 page_cache_get(pages[i]);
203 } 203 }
204 if (vmas) 204 if (vmas)
205 vmas[i] = vma; 205 vmas[i] = vma;
206 start += PAGE_SIZE; 206 start += PAGE_SIZE;
207 } 207 }
208 208
209 return i; 209 return i;
210 210
211 finish_or_fault: 211 finish_or_fault:
212 return i ? : -EFAULT; 212 return i ? : -EFAULT;
213 } 213 }
214 214
215 215
216 /* 216 /*
217 * get a list of pages in an address range belonging to the specified process 217 * get a list of pages in an address range belonging to the specified process
218 * and indicate the VMA that covers each page 218 * and indicate the VMA that covers each page
219 * - this is potentially dodgy as we may end incrementing the page count of a 219 * - this is potentially dodgy as we may end incrementing the page count of a
220 * slab page or a secondary page from a compound page 220 * slab page or a secondary page from a compound page
221 * - don't permit access to VMAs that don't support it, such as I/O mappings 221 * - don't permit access to VMAs that don't support it, such as I/O mappings
222 */ 222 */
223 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 223 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
224 unsigned long start, int len, int write, int force, 224 unsigned long start, int len, int write, int force,
225 struct page **pages, struct vm_area_struct **vmas) 225 struct page **pages, struct vm_area_struct **vmas)
226 { 226 {
227 int flags = 0; 227 int flags = 0;
228 228
229 if (write) 229 if (write)
230 flags |= GUP_FLAGS_WRITE; 230 flags |= GUP_FLAGS_WRITE;
231 if (force) 231 if (force)
232 flags |= GUP_FLAGS_FORCE; 232 flags |= GUP_FLAGS_FORCE;
233 233
234 return __get_user_pages(tsk, mm, 234 return __get_user_pages(tsk, mm,
235 start, len, flags, 235 start, len, flags,
236 pages, vmas); 236 pages, vmas);
237 } 237 }
238 EXPORT_SYMBOL(get_user_pages); 238 EXPORT_SYMBOL(get_user_pages);
239 239
240 DEFINE_RWLOCK(vmlist_lock); 240 DEFINE_RWLOCK(vmlist_lock);
241 struct vm_struct *vmlist; 241 struct vm_struct *vmlist;
242 242
243 void vfree(const void *addr) 243 void vfree(const void *addr)
244 { 244 {
245 kfree(addr); 245 kfree(addr);
246 } 246 }
247 EXPORT_SYMBOL(vfree); 247 EXPORT_SYMBOL(vfree);
248 248
249 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 249 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
250 { 250 {
251 /* 251 /*
252 * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 252 * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc()
253 * returns only a logical address. 253 * returns only a logical address.
254 */ 254 */
255 return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 255 return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
256 } 256 }
257 EXPORT_SYMBOL(__vmalloc); 257 EXPORT_SYMBOL(__vmalloc);
258 258
259 void *vmalloc_user(unsigned long size) 259 void *vmalloc_user(unsigned long size)
260 { 260 {
261 void *ret; 261 void *ret;
262 262
263 ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, 263 ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
264 PAGE_KERNEL); 264 PAGE_KERNEL);
265 if (ret) { 265 if (ret) {
266 struct vm_area_struct *vma; 266 struct vm_area_struct *vma;
267 267
268 down_write(&current->mm->mmap_sem); 268 down_write(&current->mm->mmap_sem);
269 vma = find_vma(current->mm, (unsigned long)ret); 269 vma = find_vma(current->mm, (unsigned long)ret);
270 if (vma) 270 if (vma)
271 vma->vm_flags |= VM_USERMAP; 271 vma->vm_flags |= VM_USERMAP;
272 up_write(&current->mm->mmap_sem); 272 up_write(&current->mm->mmap_sem);
273 } 273 }
274 274
275 return ret; 275 return ret;
276 } 276 }
277 EXPORT_SYMBOL(vmalloc_user); 277 EXPORT_SYMBOL(vmalloc_user);
278 278
279 struct page *vmalloc_to_page(const void *addr) 279 struct page *vmalloc_to_page(const void *addr)
280 { 280 {
281 return virt_to_page(addr); 281 return virt_to_page(addr);
282 } 282 }
283 EXPORT_SYMBOL(vmalloc_to_page); 283 EXPORT_SYMBOL(vmalloc_to_page);
284 284
285 unsigned long vmalloc_to_pfn(const void *addr) 285 unsigned long vmalloc_to_pfn(const void *addr)
286 { 286 {
287 return page_to_pfn(virt_to_page(addr)); 287 return page_to_pfn(virt_to_page(addr));
288 } 288 }
289 EXPORT_SYMBOL(vmalloc_to_pfn); 289 EXPORT_SYMBOL(vmalloc_to_pfn);
290 290
291 long vread(char *buf, char *addr, unsigned long count) 291 long vread(char *buf, char *addr, unsigned long count)
292 { 292 {
293 memcpy(buf, addr, count); 293 memcpy(buf, addr, count);
294 return count; 294 return count;
295 } 295 }
296 296
297 long vwrite(char *buf, char *addr, unsigned long count) 297 long vwrite(char *buf, char *addr, unsigned long count)
298 { 298 {
299 /* Don't allow overflow */ 299 /* Don't allow overflow */
300 if ((unsigned long) addr + count < count) 300 if ((unsigned long) addr + count < count)
301 count = -(unsigned long) addr; 301 count = -(unsigned long) addr;
302 302
303 memcpy(addr, buf, count); 303 memcpy(addr, buf, count);
304 return(count); 304 return(count);
305 } 305 }
306 306
307 /* 307 /*
308 * vmalloc - allocate virtually continguos memory 308 * vmalloc - allocate virtually continguos memory
309 * 309 *
310 * @size: allocation size 310 * @size: allocation size
311 * 311 *
312 * Allocate enough pages to cover @size from the page level 312 * Allocate enough pages to cover @size from the page level
313 * allocator and map them into continguos kernel virtual space. 313 * allocator and map them into continguos kernel virtual space.
314 * 314 *
315 * For tight control over page level allocator and protection flags 315 * For tight control over page level allocator and protection flags
316 * use __vmalloc() instead. 316 * use __vmalloc() instead.
317 */ 317 */
318 void *vmalloc(unsigned long size) 318 void *vmalloc(unsigned long size)
319 { 319 {
320 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 320 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
321 } 321 }
322 EXPORT_SYMBOL(vmalloc); 322 EXPORT_SYMBOL(vmalloc);
323 323
324 void *vmalloc_node(unsigned long size, int node) 324 void *vmalloc_node(unsigned long size, int node)
325 { 325 {
326 return vmalloc(size); 326 return vmalloc(size);
327 } 327 }
328 EXPORT_SYMBOL(vmalloc_node); 328 EXPORT_SYMBOL(vmalloc_node);
329 329
330 #ifndef PAGE_KERNEL_EXEC 330 #ifndef PAGE_KERNEL_EXEC
331 # define PAGE_KERNEL_EXEC PAGE_KERNEL 331 # define PAGE_KERNEL_EXEC PAGE_KERNEL
332 #endif 332 #endif
333 333
334 /** 334 /**
335 * vmalloc_exec - allocate virtually contiguous, executable memory 335 * vmalloc_exec - allocate virtually contiguous, executable memory
336 * @size: allocation size 336 * @size: allocation size
337 * 337 *
338 * Kernel-internal function to allocate enough pages to cover @size 338 * Kernel-internal function to allocate enough pages to cover @size
339 * the page level allocator and map them into contiguous and 339 * the page level allocator and map them into contiguous and
340 * executable kernel virtual space. 340 * executable kernel virtual space.
341 * 341 *
342 * For tight control over page level allocator and protection flags 342 * For tight control over page level allocator and protection flags
343 * use __vmalloc() instead. 343 * use __vmalloc() instead.
344 */ 344 */
345 345
346 void *vmalloc_exec(unsigned long size) 346 void *vmalloc_exec(unsigned long size)
347 { 347 {
348 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); 348 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
349 } 349 }
350 350
351 /** 351 /**
352 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 352 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
353 * @size: allocation size 353 * @size: allocation size
354 * 354 *
355 * Allocate enough 32bit PA addressable pages to cover @size from the 355 * Allocate enough 32bit PA addressable pages to cover @size from the
356 * page level allocator and map them into continguos kernel virtual space. 356 * page level allocator and map them into continguos kernel virtual space.
357 */ 357 */
358 void *vmalloc_32(unsigned long size) 358 void *vmalloc_32(unsigned long size)
359 { 359 {
360 return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 360 return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
361 } 361 }
362 EXPORT_SYMBOL(vmalloc_32); 362 EXPORT_SYMBOL(vmalloc_32);
363 363
364 /** 364 /**
365 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 365 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
366 * @size: allocation size 366 * @size: allocation size
367 * 367 *
368 * The resulting memory area is 32bit addressable and zeroed so it can be 368 * The resulting memory area is 32bit addressable and zeroed so it can be
369 * mapped to userspace without leaking data. 369 * mapped to userspace without leaking data.
370 * 370 *
371 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 371 * VM_USERMAP is set on the corresponding VMA so that subsequent calls to
372 * remap_vmalloc_range() are permissible. 372 * remap_vmalloc_range() are permissible.
373 */ 373 */
374 void *vmalloc_32_user(unsigned long size) 374 void *vmalloc_32_user(unsigned long size)
375 { 375 {
376 /* 376 /*
377 * We'll have to sort out the ZONE_DMA bits for 64-bit, 377 * We'll have to sort out the ZONE_DMA bits for 64-bit,
378 * but for now this can simply use vmalloc_user() directly. 378 * but for now this can simply use vmalloc_user() directly.
379 */ 379 */
380 return vmalloc_user(size); 380 return vmalloc_user(size);
381 } 381 }
382 EXPORT_SYMBOL(vmalloc_32_user); 382 EXPORT_SYMBOL(vmalloc_32_user);
383 383
384 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 384 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
385 { 385 {
386 BUG(); 386 BUG();
387 return NULL; 387 return NULL;
388 } 388 }
389 EXPORT_SYMBOL(vmap); 389 EXPORT_SYMBOL(vmap);
390 390
391 void vunmap(const void *addr) 391 void vunmap(const void *addr)
392 { 392 {
393 BUG(); 393 BUG();
394 } 394 }
395 EXPORT_SYMBOL(vunmap); 395 EXPORT_SYMBOL(vunmap);
396 396
397 void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) 397 void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
398 { 398 {
399 BUG(); 399 BUG();
400 return NULL; 400 return NULL;
401 } 401 }
402 EXPORT_SYMBOL(vm_map_ram); 402 EXPORT_SYMBOL(vm_map_ram);
403 403
404 void vm_unmap_ram(const void *mem, unsigned int count) 404 void vm_unmap_ram(const void *mem, unsigned int count)
405 { 405 {
406 BUG(); 406 BUG();
407 } 407 }
408 EXPORT_SYMBOL(vm_unmap_ram); 408 EXPORT_SYMBOL(vm_unmap_ram);
409 409
410 void vm_unmap_aliases(void) 410 void vm_unmap_aliases(void)
411 { 411 {
412 } 412 }
413 EXPORT_SYMBOL_GPL(vm_unmap_aliases); 413 EXPORT_SYMBOL_GPL(vm_unmap_aliases);
414 414
415 /* 415 /*
416 * Implement a stub for vmalloc_sync_all() if the architecture chose not to 416 * Implement a stub for vmalloc_sync_all() if the architecture chose not to
417 * have one. 417 * have one.
418 */ 418 */
419 void __attribute__((weak)) vmalloc_sync_all(void) 419 void __attribute__((weak)) vmalloc_sync_all(void)
420 { 420 {
421 } 421 }
422 422
423 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 423 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
424 struct page *page) 424 struct page *page)
425 { 425 {
426 return -EINVAL; 426 return -EINVAL;
427 } 427 }
428 EXPORT_SYMBOL(vm_insert_page); 428 EXPORT_SYMBOL(vm_insert_page);
429 429
430 /* 430 /*
431 * sys_brk() for the most part doesn't need the global kernel 431 * sys_brk() for the most part doesn't need the global kernel
432 * lock, except when an application is doing something nasty 432 * lock, except when an application is doing something nasty
433 * like trying to un-brk an area that has already been mapped 433 * like trying to un-brk an area that has already been mapped
434 * to a regular file. in this case, the unmapping will need 434 * to a regular file. in this case, the unmapping will need
435 * to invoke file system routines that need the global lock. 435 * to invoke file system routines that need the global lock.
436 */ 436 */
437 SYSCALL_DEFINE1(brk, unsigned long, brk) 437 SYSCALL_DEFINE1(brk, unsigned long, brk)
438 { 438 {
439 struct mm_struct *mm = current->mm; 439 struct mm_struct *mm = current->mm;
440 440
441 if (brk < mm->start_brk || brk > mm->context.end_brk) 441 if (brk < mm->start_brk || brk > mm->context.end_brk)
442 return mm->brk; 442 return mm->brk;
443 443
444 if (mm->brk == brk) 444 if (mm->brk == brk)
445 return mm->brk; 445 return mm->brk;
446 446
447 /* 447 /*
448 * Always allow shrinking brk 448 * Always allow shrinking brk
449 */ 449 */
450 if (brk <= mm->brk) { 450 if (brk <= mm->brk) {
451 mm->brk = brk; 451 mm->brk = brk;
452 return brk; 452 return brk;
453 } 453 }
454 454
455 /* 455 /*
456 * Ok, looks good - let it rip. 456 * Ok, looks good - let it rip.
457 */ 457 */
458 return mm->brk = brk; 458 return mm->brk = brk;
459 } 459 }
460 460
461 /* 461 /*
462 * initialise the VMA and region record slabs 462 * initialise the VMA and region record slabs
463 */ 463 */
464 void __init mmap_init(void) 464 void __init mmap_init(void)
465 { 465 {
466 int ret; 466 int ret;
467 467
468 ret = percpu_counter_init(&vm_committed_as, 0); 468 ret = percpu_counter_init(&vm_committed_as, 0);
469 VM_BUG_ON(ret); 469 VM_BUG_ON(ret);
470 vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); 470 vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC);
471 } 471 }
472 472
473 /* 473 /*
474 * validate the region tree 474 * validate the region tree
475 * - the caller must hold the region lock 475 * - the caller must hold the region lock
476 */ 476 */
477 #ifdef CONFIG_DEBUG_NOMMU_REGIONS 477 #ifdef CONFIG_DEBUG_NOMMU_REGIONS
478 static noinline void validate_nommu_regions(void) 478 static noinline void validate_nommu_regions(void)
479 { 479 {
480 struct vm_region *region, *last; 480 struct vm_region *region, *last;
481 struct rb_node *p, *lastp; 481 struct rb_node *p, *lastp;
482 482
483 lastp = rb_first(&nommu_region_tree); 483 lastp = rb_first(&nommu_region_tree);
484 if (!lastp) 484 if (!lastp)
485 return; 485 return;
486 486
487 last = rb_entry(lastp, struct vm_region, vm_rb); 487 last = rb_entry(lastp, struct vm_region, vm_rb);
488 BUG_ON(unlikely(last->vm_end <= last->vm_start)); 488 BUG_ON(unlikely(last->vm_end <= last->vm_start));
489 BUG_ON(unlikely(last->vm_top < last->vm_end)); 489 BUG_ON(unlikely(last->vm_top < last->vm_end));
490 490
491 while ((p = rb_next(lastp))) { 491 while ((p = rb_next(lastp))) {
492 region = rb_entry(p, struct vm_region, vm_rb); 492 region = rb_entry(p, struct vm_region, vm_rb);
493 last = rb_entry(lastp, struct vm_region, vm_rb); 493 last = rb_entry(lastp, struct vm_region, vm_rb);
494 494
495 BUG_ON(unlikely(region->vm_end <= region->vm_start)); 495 BUG_ON(unlikely(region->vm_end <= region->vm_start));
496 BUG_ON(unlikely(region->vm_top < region->vm_end)); 496 BUG_ON(unlikely(region->vm_top < region->vm_end));
497 BUG_ON(unlikely(region->vm_start < last->vm_top)); 497 BUG_ON(unlikely(region->vm_start < last->vm_top));
498 498
499 lastp = p; 499 lastp = p;
500 } 500 }
501 } 501 }
502 #else 502 #else
503 static void validate_nommu_regions(void) 503 static void validate_nommu_regions(void)
504 { 504 {
505 } 505 }
506 #endif 506 #endif
507 507
508 /* 508 /*
509 * add a region into the global tree 509 * add a region into the global tree
510 */ 510 */
511 static void add_nommu_region(struct vm_region *region) 511 static void add_nommu_region(struct vm_region *region)
512 { 512 {
513 struct vm_region *pregion; 513 struct vm_region *pregion;
514 struct rb_node **p, *parent; 514 struct rb_node **p, *parent;
515 515
516 validate_nommu_regions(); 516 validate_nommu_regions();
517 517
518 BUG_ON(region->vm_start & ~PAGE_MASK);
519
520 parent = NULL; 518 parent = NULL;
521 p = &nommu_region_tree.rb_node; 519 p = &nommu_region_tree.rb_node;
522 while (*p) { 520 while (*p) {
523 parent = *p; 521 parent = *p;
524 pregion = rb_entry(parent, struct vm_region, vm_rb); 522 pregion = rb_entry(parent, struct vm_region, vm_rb);
525 if (region->vm_start < pregion->vm_start) 523 if (region->vm_start < pregion->vm_start)
526 p = &(*p)->rb_left; 524 p = &(*p)->rb_left;
527 else if (region->vm_start > pregion->vm_start) 525 else if (region->vm_start > pregion->vm_start)
528 p = &(*p)->rb_right; 526 p = &(*p)->rb_right;
529 else if (pregion == region) 527 else if (pregion == region)
530 return; 528 return;
531 else 529 else
532 BUG(); 530 BUG();
533 } 531 }
534 532
535 rb_link_node(&region->vm_rb, parent, p); 533 rb_link_node(&region->vm_rb, parent, p);
536 rb_insert_color(&region->vm_rb, &nommu_region_tree); 534 rb_insert_color(&region->vm_rb, &nommu_region_tree);
537 535
538 validate_nommu_regions(); 536 validate_nommu_regions();
539 } 537 }
540 538
541 /* 539 /*
542 * delete a region from the global tree 540 * delete a region from the global tree
543 */ 541 */
544 static void delete_nommu_region(struct vm_region *region) 542 static void delete_nommu_region(struct vm_region *region)
545 { 543 {
546 BUG_ON(!nommu_region_tree.rb_node); 544 BUG_ON(!nommu_region_tree.rb_node);
547 545
548 validate_nommu_regions(); 546 validate_nommu_regions();
549 rb_erase(&region->vm_rb, &nommu_region_tree); 547 rb_erase(&region->vm_rb, &nommu_region_tree);
550 validate_nommu_regions(); 548 validate_nommu_regions();
551 } 549 }
552 550
553 /* 551 /*
554 * free a contiguous series of pages 552 * free a contiguous series of pages
555 */ 553 */
556 static void free_page_series(unsigned long from, unsigned long to) 554 static void free_page_series(unsigned long from, unsigned long to)
557 { 555 {
558 for (; from < to; from += PAGE_SIZE) { 556 for (; from < to; from += PAGE_SIZE) {
559 struct page *page = virt_to_page(from); 557 struct page *page = virt_to_page(from);
560 558
561 kdebug("- free %lx", from); 559 kdebug("- free %lx", from);
562 atomic_long_dec(&mmap_pages_allocated); 560 atomic_long_dec(&mmap_pages_allocated);
563 if (page_count(page) != 1) 561 if (page_count(page) != 1)
564 kdebug("free page %p: refcount not one: %d", 562 kdebug("free page %p: refcount not one: %d",
565 page, page_count(page)); 563 page, page_count(page));
566 put_page(page); 564 put_page(page);
567 } 565 }
568 } 566 }
569 567
570 /* 568 /*
571 * release a reference to a region 569 * release a reference to a region
572 * - the caller must hold the region semaphore for writing, which this releases 570 * - the caller must hold the region semaphore for writing, which this releases
573 * - the region may not have been added to the tree yet, in which case vm_top 571 * - the region may not have been added to the tree yet, in which case vm_top
574 * will equal vm_start 572 * will equal vm_start
575 */ 573 */
576 static void __put_nommu_region(struct vm_region *region) 574 static void __put_nommu_region(struct vm_region *region)
577 __releases(nommu_region_sem) 575 __releases(nommu_region_sem)
578 { 576 {
579 kenter("%p{%d}", region, atomic_read(&region->vm_usage)); 577 kenter("%p{%d}", region, atomic_read(&region->vm_usage));
580 578
581 BUG_ON(!nommu_region_tree.rb_node); 579 BUG_ON(!nommu_region_tree.rb_node);
582 580
583 if (atomic_dec_and_test(&region->vm_usage)) { 581 if (atomic_dec_and_test(&region->vm_usage)) {
584 if (region->vm_top > region->vm_start) 582 if (region->vm_top > region->vm_start)
585 delete_nommu_region(region); 583 delete_nommu_region(region);
586 up_write(&nommu_region_sem); 584 up_write(&nommu_region_sem);
587 585
588 if (region->vm_file) 586 if (region->vm_file)
589 fput(region->vm_file); 587 fput(region->vm_file);
590 588
591 /* IO memory and memory shared directly out of the pagecache 589 /* IO memory and memory shared directly out of the pagecache
592 * from ramfs/tmpfs mustn't be released here */ 590 * from ramfs/tmpfs mustn't be released here */
593 if (region->vm_flags & VM_MAPPED_COPY) { 591 if (region->vm_flags & VM_MAPPED_COPY) {
594 kdebug("free series"); 592 kdebug("free series");
595 free_page_series(region->vm_start, region->vm_top); 593 free_page_series(region->vm_start, region->vm_top);
596 } 594 }
597 kmem_cache_free(vm_region_jar, region); 595 kmem_cache_free(vm_region_jar, region);
598 } else { 596 } else {
599 up_write(&nommu_region_sem); 597 up_write(&nommu_region_sem);
600 } 598 }
601 } 599 }
602 600
603 /* 601 /*
604 * release a reference to a region 602 * release a reference to a region
605 */ 603 */
606 static void put_nommu_region(struct vm_region *region) 604 static void put_nommu_region(struct vm_region *region)
607 { 605 {
608 down_write(&nommu_region_sem); 606 down_write(&nommu_region_sem);
609 __put_nommu_region(region); 607 __put_nommu_region(region);
610 } 608 }
611 609
612 /* 610 /*
613 * add a VMA into a process's mm_struct in the appropriate place in the list 611 * add a VMA into a process's mm_struct in the appropriate place in the list
614 * and tree and add to the address space's page tree also if not an anonymous 612 * and tree and add to the address space's page tree also if not an anonymous
615 * page 613 * page
616 * - should be called with mm->mmap_sem held writelocked 614 * - should be called with mm->mmap_sem held writelocked
617 */ 615 */
618 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) 616 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
619 { 617 {
620 struct vm_area_struct *pvma, **pp; 618 struct vm_area_struct *pvma, **pp;
621 struct address_space *mapping; 619 struct address_space *mapping;
622 struct rb_node **p, *parent; 620 struct rb_node **p, *parent;
623 621
624 kenter(",%p", vma); 622 kenter(",%p", vma);
625 623
626 BUG_ON(!vma->vm_region); 624 BUG_ON(!vma->vm_region);
627 625
628 mm->map_count++; 626 mm->map_count++;
629 vma->vm_mm = mm; 627 vma->vm_mm = mm;
630 628
631 /* add the VMA to the mapping */ 629 /* add the VMA to the mapping */
632 if (vma->vm_file) { 630 if (vma->vm_file) {
633 mapping = vma->vm_file->f_mapping; 631 mapping = vma->vm_file->f_mapping;
634 632
635 flush_dcache_mmap_lock(mapping); 633 flush_dcache_mmap_lock(mapping);
636 vma_prio_tree_insert(vma, &mapping->i_mmap); 634 vma_prio_tree_insert(vma, &mapping->i_mmap);
637 flush_dcache_mmap_unlock(mapping); 635 flush_dcache_mmap_unlock(mapping);
638 } 636 }
639 637
640 /* add the VMA to the tree */ 638 /* add the VMA to the tree */
641 parent = NULL; 639 parent = NULL;
642 p = &mm->mm_rb.rb_node; 640 p = &mm->mm_rb.rb_node;
643 while (*p) { 641 while (*p) {
644 parent = *p; 642 parent = *p;
645 pvma = rb_entry(parent, struct vm_area_struct, vm_rb); 643 pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
646 644
647 /* sort by: start addr, end addr, VMA struct addr in that order 645 /* sort by: start addr, end addr, VMA struct addr in that order
648 * (the latter is necessary as we may get identical VMAs) */ 646 * (the latter is necessary as we may get identical VMAs) */
649 if (vma->vm_start < pvma->vm_start) 647 if (vma->vm_start < pvma->vm_start)
650 p = &(*p)->rb_left; 648 p = &(*p)->rb_left;
651 else if (vma->vm_start > pvma->vm_start) 649 else if (vma->vm_start > pvma->vm_start)
652 p = &(*p)->rb_right; 650 p = &(*p)->rb_right;
653 else if (vma->vm_end < pvma->vm_end) 651 else if (vma->vm_end < pvma->vm_end)
654 p = &(*p)->rb_left; 652 p = &(*p)->rb_left;
655 else if (vma->vm_end > pvma->vm_end) 653 else if (vma->vm_end > pvma->vm_end)
656 p = &(*p)->rb_right; 654 p = &(*p)->rb_right;
657 else if (vma < pvma) 655 else if (vma < pvma)
658 p = &(*p)->rb_left; 656 p = &(*p)->rb_left;
659 else if (vma > pvma) 657 else if (vma > pvma)
660 p = &(*p)->rb_right; 658 p = &(*p)->rb_right;
661 else 659 else
662 BUG(); 660 BUG();
663 } 661 }
664 662
665 rb_link_node(&vma->vm_rb, parent, p); 663 rb_link_node(&vma->vm_rb, parent, p);
666 rb_insert_color(&vma->vm_rb, &mm->mm_rb); 664 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
667 665
668 /* add VMA to the VMA list also */ 666 /* add VMA to the VMA list also */
669 for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) { 667 for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) {
670 if (pvma->vm_start > vma->vm_start) 668 if (pvma->vm_start > vma->vm_start)
671 break; 669 break;
672 if (pvma->vm_start < vma->vm_start) 670 if (pvma->vm_start < vma->vm_start)
673 continue; 671 continue;
674 if (pvma->vm_end < vma->vm_end) 672 if (pvma->vm_end < vma->vm_end)
675 break; 673 break;
676 } 674 }
677 675
678 vma->vm_next = *pp; 676 vma->vm_next = *pp;
679 *pp = vma; 677 *pp = vma;
680 } 678 }
681 679
682 /* 680 /*
683 * delete a VMA from its owning mm_struct and address space 681 * delete a VMA from its owning mm_struct and address space
684 */ 682 */
685 static void delete_vma_from_mm(struct vm_area_struct *vma) 683 static void delete_vma_from_mm(struct vm_area_struct *vma)
686 { 684 {
687 struct vm_area_struct **pp; 685 struct vm_area_struct **pp;
688 struct address_space *mapping; 686 struct address_space *mapping;
689 struct mm_struct *mm = vma->vm_mm; 687 struct mm_struct *mm = vma->vm_mm;
690 688
691 kenter("%p", vma); 689 kenter("%p", vma);
692 690
693 mm->map_count--; 691 mm->map_count--;
694 if (mm->mmap_cache == vma) 692 if (mm->mmap_cache == vma)
695 mm->mmap_cache = NULL; 693 mm->mmap_cache = NULL;
696 694
697 /* remove the VMA from the mapping */ 695 /* remove the VMA from the mapping */
698 if (vma->vm_file) { 696 if (vma->vm_file) {
699 mapping = vma->vm_file->f_mapping; 697 mapping = vma->vm_file->f_mapping;
700 698
701 flush_dcache_mmap_lock(mapping); 699 flush_dcache_mmap_lock(mapping);
702 vma_prio_tree_remove(vma, &mapping->i_mmap); 700 vma_prio_tree_remove(vma, &mapping->i_mmap);
703 flush_dcache_mmap_unlock(mapping); 701 flush_dcache_mmap_unlock(mapping);
704 } 702 }
705 703
706 /* remove from the MM's tree and list */ 704 /* remove from the MM's tree and list */
707 rb_erase(&vma->vm_rb, &mm->mm_rb); 705 rb_erase(&vma->vm_rb, &mm->mm_rb);
708 for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) { 706 for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) {
709 if (*pp == vma) { 707 if (*pp == vma) {
710 *pp = vma->vm_next; 708 *pp = vma->vm_next;
711 break; 709 break;
712 } 710 }
713 } 711 }
714 712
715 vma->vm_mm = NULL; 713 vma->vm_mm = NULL;
716 } 714 }
717 715
718 /* 716 /*
719 * destroy a VMA record 717 * destroy a VMA record
720 */ 718 */
721 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) 719 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
722 { 720 {
723 kenter("%p", vma); 721 kenter("%p", vma);
724 if (vma->vm_ops && vma->vm_ops->close) 722 if (vma->vm_ops && vma->vm_ops->close)
725 vma->vm_ops->close(vma); 723 vma->vm_ops->close(vma);
726 if (vma->vm_file) { 724 if (vma->vm_file) {
727 fput(vma->vm_file); 725 fput(vma->vm_file);
728 if (vma->vm_flags & VM_EXECUTABLE) 726 if (vma->vm_flags & VM_EXECUTABLE)
729 removed_exe_file_vma(mm); 727 removed_exe_file_vma(mm);
730 } 728 }
731 put_nommu_region(vma->vm_region); 729 put_nommu_region(vma->vm_region);
732 kmem_cache_free(vm_area_cachep, vma); 730 kmem_cache_free(vm_area_cachep, vma);
733 } 731 }
734 732
735 /* 733 /*
736 * look up the first VMA in which addr resides, NULL if none 734 * look up the first VMA in which addr resides, NULL if none
737 * - should be called with mm->mmap_sem at least held readlocked 735 * - should be called with mm->mmap_sem at least held readlocked
738 */ 736 */
739 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 737 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
740 { 738 {
741 struct vm_area_struct *vma; 739 struct vm_area_struct *vma;
742 struct rb_node *n = mm->mm_rb.rb_node; 740 struct rb_node *n = mm->mm_rb.rb_node;
743 741
744 /* check the cache first */ 742 /* check the cache first */
745 vma = mm->mmap_cache; 743 vma = mm->mmap_cache;
746 if (vma && vma->vm_start <= addr && vma->vm_end > addr) 744 if (vma && vma->vm_start <= addr && vma->vm_end > addr)
747 return vma; 745 return vma;
748 746
749 /* trawl the tree (there may be multiple mappings in which addr 747 /* trawl the tree (there may be multiple mappings in which addr
750 * resides) */ 748 * resides) */
751 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { 749 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
752 vma = rb_entry(n, struct vm_area_struct, vm_rb); 750 vma = rb_entry(n, struct vm_area_struct, vm_rb);
753 if (vma->vm_start > addr) 751 if (vma->vm_start > addr)
754 return NULL; 752 return NULL;
755 if (vma->vm_end > addr) { 753 if (vma->vm_end > addr) {
756 mm->mmap_cache = vma; 754 mm->mmap_cache = vma;
757 return vma; 755 return vma;
758 } 756 }
759 } 757 }
760 758
761 return NULL; 759 return NULL;
762 } 760 }
763 EXPORT_SYMBOL(find_vma); 761 EXPORT_SYMBOL(find_vma);
764 762
765 /* 763 /*
766 * find a VMA 764 * find a VMA
767 * - we don't extend stack VMAs under NOMMU conditions 765 * - we don't extend stack VMAs under NOMMU conditions
768 */ 766 */
769 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 767 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
770 { 768 {
771 return find_vma(mm, addr); 769 return find_vma(mm, addr);
772 } 770 }
773 771
774 /* 772 /*
775 * expand a stack to a given address 773 * expand a stack to a given address
776 * - not supported under NOMMU conditions 774 * - not supported under NOMMU conditions
777 */ 775 */
778 int expand_stack(struct vm_area_struct *vma, unsigned long address) 776 int expand_stack(struct vm_area_struct *vma, unsigned long address)
779 { 777 {
780 return -ENOMEM; 778 return -ENOMEM;
781 } 779 }
782 780
783 /* 781 /*
784 * look up the first VMA exactly that exactly matches addr 782 * look up the first VMA exactly that exactly matches addr
785 * - should be called with mm->mmap_sem at least held readlocked 783 * - should be called with mm->mmap_sem at least held readlocked
786 */ 784 */
787 static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 785 static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
788 unsigned long addr, 786 unsigned long addr,
789 unsigned long len) 787 unsigned long len)
790 { 788 {
791 struct vm_area_struct *vma; 789 struct vm_area_struct *vma;
792 struct rb_node *n = mm->mm_rb.rb_node; 790 struct rb_node *n = mm->mm_rb.rb_node;
793 unsigned long end = addr + len; 791 unsigned long end = addr + len;
794 792
795 /* check the cache first */ 793 /* check the cache first */
796 vma = mm->mmap_cache; 794 vma = mm->mmap_cache;
797 if (vma && vma->vm_start == addr && vma->vm_end == end) 795 if (vma && vma->vm_start == addr && vma->vm_end == end)
798 return vma; 796 return vma;
799 797
800 /* trawl the tree (there may be multiple mappings in which addr 798 /* trawl the tree (there may be multiple mappings in which addr
801 * resides) */ 799 * resides) */
802 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { 800 for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
803 vma = rb_entry(n, struct vm_area_struct, vm_rb); 801 vma = rb_entry(n, struct vm_area_struct, vm_rb);
804 if (vma->vm_start < addr) 802 if (vma->vm_start < addr)
805 continue; 803 continue;
806 if (vma->vm_start > addr) 804 if (vma->vm_start > addr)
807 return NULL; 805 return NULL;
808 if (vma->vm_end == end) { 806 if (vma->vm_end == end) {
809 mm->mmap_cache = vma; 807 mm->mmap_cache = vma;
810 return vma; 808 return vma;
811 } 809 }
812 } 810 }
813 811
814 return NULL; 812 return NULL;
815 } 813 }
816 814
817 /* 815 /*
818 * determine whether a mapping should be permitted and, if so, what sort of 816 * determine whether a mapping should be permitted and, if so, what sort of
819 * mapping we're capable of supporting 817 * mapping we're capable of supporting
820 */ 818 */
821 static int validate_mmap_request(struct file *file, 819 static int validate_mmap_request(struct file *file,
822 unsigned long addr, 820 unsigned long addr,
823 unsigned long len, 821 unsigned long len,
824 unsigned long prot, 822 unsigned long prot,
825 unsigned long flags, 823 unsigned long flags,
826 unsigned long pgoff, 824 unsigned long pgoff,
827 unsigned long *_capabilities) 825 unsigned long *_capabilities)
828 { 826 {
829 unsigned long capabilities, rlen; 827 unsigned long capabilities, rlen;
830 unsigned long reqprot = prot; 828 unsigned long reqprot = prot;
831 int ret; 829 int ret;
832 830
833 /* do the simple checks first */ 831 /* do the simple checks first */
834 if (flags & MAP_FIXED || addr) { 832 if (flags & MAP_FIXED || addr) {
835 printk(KERN_DEBUG 833 printk(KERN_DEBUG
836 "%d: Can't do fixed-address/overlay mmap of RAM\n", 834 "%d: Can't do fixed-address/overlay mmap of RAM\n",
837 current->pid); 835 current->pid);
838 return -EINVAL; 836 return -EINVAL;
839 } 837 }
840 838
841 if ((flags & MAP_TYPE) != MAP_PRIVATE && 839 if ((flags & MAP_TYPE) != MAP_PRIVATE &&
842 (flags & MAP_TYPE) != MAP_SHARED) 840 (flags & MAP_TYPE) != MAP_SHARED)
843 return -EINVAL; 841 return -EINVAL;
844 842
845 if (!len) 843 if (!len)
846 return -EINVAL; 844 return -EINVAL;
847 845
848 /* Careful about overflows.. */ 846 /* Careful about overflows.. */
849 rlen = PAGE_ALIGN(len); 847 rlen = PAGE_ALIGN(len);
850 if (!rlen || rlen > TASK_SIZE) 848 if (!rlen || rlen > TASK_SIZE)
851 return -ENOMEM; 849 return -ENOMEM;
852 850
853 /* offset overflow? */ 851 /* offset overflow? */
854 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) 852 if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff)
855 return -EOVERFLOW; 853 return -EOVERFLOW;
856 854
857 if (file) { 855 if (file) {
858 /* validate file mapping requests */ 856 /* validate file mapping requests */
859 struct address_space *mapping; 857 struct address_space *mapping;
860 858
861 /* files must support mmap */ 859 /* files must support mmap */
862 if (!file->f_op || !file->f_op->mmap) 860 if (!file->f_op || !file->f_op->mmap)
863 return -ENODEV; 861 return -ENODEV;
864 862
865 /* work out if what we've got could possibly be shared 863 /* work out if what we've got could possibly be shared
866 * - we support chardevs that provide their own "memory" 864 * - we support chardevs that provide their own "memory"
867 * - we support files/blockdevs that are memory backed 865 * - we support files/blockdevs that are memory backed
868 */ 866 */
869 mapping = file->f_mapping; 867 mapping = file->f_mapping;
870 if (!mapping) 868 if (!mapping)
871 mapping = file->f_path.dentry->d_inode->i_mapping; 869 mapping = file->f_path.dentry->d_inode->i_mapping;
872 870
873 capabilities = 0; 871 capabilities = 0;
874 if (mapping && mapping->backing_dev_info) 872 if (mapping && mapping->backing_dev_info)
875 capabilities = mapping->backing_dev_info->capabilities; 873 capabilities = mapping->backing_dev_info->capabilities;
876 874
877 if (!capabilities) { 875 if (!capabilities) {
878 /* no explicit capabilities set, so assume some 876 /* no explicit capabilities set, so assume some
879 * defaults */ 877 * defaults */
880 switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) { 878 switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) {
881 case S_IFREG: 879 case S_IFREG:
882 case S_IFBLK: 880 case S_IFBLK:
883 capabilities = BDI_CAP_MAP_COPY; 881 capabilities = BDI_CAP_MAP_COPY;
884 break; 882 break;
885 883
886 case S_IFCHR: 884 case S_IFCHR:
887 capabilities = 885 capabilities =
888 BDI_CAP_MAP_DIRECT | 886 BDI_CAP_MAP_DIRECT |
889 BDI_CAP_READ_MAP | 887 BDI_CAP_READ_MAP |
890 BDI_CAP_WRITE_MAP; 888 BDI_CAP_WRITE_MAP;
891 break; 889 break;
892 890
893 default: 891 default:
894 return -EINVAL; 892 return -EINVAL;
895 } 893 }
896 } 894 }
897 895
898 /* eliminate any capabilities that we can't support on this 896 /* eliminate any capabilities that we can't support on this
899 * device */ 897 * device */
900 if (!file->f_op->get_unmapped_area) 898 if (!file->f_op->get_unmapped_area)
901 capabilities &= ~BDI_CAP_MAP_DIRECT; 899 capabilities &= ~BDI_CAP_MAP_DIRECT;
902 if (!file->f_op->read) 900 if (!file->f_op->read)
903 capabilities &= ~BDI_CAP_MAP_COPY; 901 capabilities &= ~BDI_CAP_MAP_COPY;
904 902
905 if (flags & MAP_SHARED) { 903 if (flags & MAP_SHARED) {
906 /* do checks for writing, appending and locking */ 904 /* do checks for writing, appending and locking */
907 if ((prot & PROT_WRITE) && 905 if ((prot & PROT_WRITE) &&
908 !(file->f_mode & FMODE_WRITE)) 906 !(file->f_mode & FMODE_WRITE))
909 return -EACCES; 907 return -EACCES;
910 908
911 if (IS_APPEND(file->f_path.dentry->d_inode) && 909 if (IS_APPEND(file->f_path.dentry->d_inode) &&
912 (file->f_mode & FMODE_WRITE)) 910 (file->f_mode & FMODE_WRITE))
913 return -EACCES; 911 return -EACCES;
914 912
915 if (locks_verify_locked(file->f_path.dentry->d_inode)) 913 if (locks_verify_locked(file->f_path.dentry->d_inode))
916 return -EAGAIN; 914 return -EAGAIN;
917 915
918 if (!(capabilities & BDI_CAP_MAP_DIRECT)) 916 if (!(capabilities & BDI_CAP_MAP_DIRECT))
919 return -ENODEV; 917 return -ENODEV;
920 918
921 if (((prot & PROT_READ) && !(capabilities & BDI_CAP_READ_MAP)) || 919 if (((prot & PROT_READ) && !(capabilities & BDI_CAP_READ_MAP)) ||
922 ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) || 920 ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) ||
923 ((prot & PROT_EXEC) && !(capabilities & BDI_CAP_EXEC_MAP)) 921 ((prot & PROT_EXEC) && !(capabilities & BDI_CAP_EXEC_MAP))
924 ) { 922 ) {
925 printk("MAP_SHARED not completely supported on !MMU\n"); 923 printk("MAP_SHARED not completely supported on !MMU\n");
926 return -EINVAL; 924 return -EINVAL;
927 } 925 }
928 926
929 /* we mustn't privatise shared mappings */ 927 /* we mustn't privatise shared mappings */
930 capabilities &= ~BDI_CAP_MAP_COPY; 928 capabilities &= ~BDI_CAP_MAP_COPY;
931 } 929 }
932 else { 930 else {
933 /* we're going to read the file into private memory we 931 /* we're going to read the file into private memory we
934 * allocate */ 932 * allocate */
935 if (!(capabilities & BDI_CAP_MAP_COPY)) 933 if (!(capabilities & BDI_CAP_MAP_COPY))
936 return -ENODEV; 934 return -ENODEV;
937 935
938 /* we don't permit a private writable mapping to be 936 /* we don't permit a private writable mapping to be
939 * shared with the backing device */ 937 * shared with the backing device */
940 if (prot & PROT_WRITE) 938 if (prot & PROT_WRITE)
941 capabilities &= ~BDI_CAP_MAP_DIRECT; 939 capabilities &= ~BDI_CAP_MAP_DIRECT;
942 } 940 }
943 941
944 /* handle executable mappings and implied executable 942 /* handle executable mappings and implied executable
945 * mappings */ 943 * mappings */
946 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { 944 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
947 if (prot & PROT_EXEC) 945 if (prot & PROT_EXEC)
948 return -EPERM; 946 return -EPERM;
949 } 947 }
950 else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 948 else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
951 /* handle implication of PROT_EXEC by PROT_READ */ 949 /* handle implication of PROT_EXEC by PROT_READ */
952 if (current->personality & READ_IMPLIES_EXEC) { 950 if (current->personality & READ_IMPLIES_EXEC) {
953 if (capabilities & BDI_CAP_EXEC_MAP) 951 if (capabilities & BDI_CAP_EXEC_MAP)
954 prot |= PROT_EXEC; 952 prot |= PROT_EXEC;
955 } 953 }
956 } 954 }
957 else if ((prot & PROT_READ) && 955 else if ((prot & PROT_READ) &&
958 (prot & PROT_EXEC) && 956 (prot & PROT_EXEC) &&
959 !(capabilities & BDI_CAP_EXEC_MAP) 957 !(capabilities & BDI_CAP_EXEC_MAP)
960 ) { 958 ) {
961 /* backing file is not executable, try to copy */ 959 /* backing file is not executable, try to copy */
962 capabilities &= ~BDI_CAP_MAP_DIRECT; 960 capabilities &= ~BDI_CAP_MAP_DIRECT;
963 } 961 }
964 } 962 }
965 else { 963 else {
966 /* anonymous mappings are always memory backed and can be 964 /* anonymous mappings are always memory backed and can be
967 * privately mapped 965 * privately mapped
968 */ 966 */
969 capabilities = BDI_CAP_MAP_COPY; 967 capabilities = BDI_CAP_MAP_COPY;
970 968
971 /* handle PROT_EXEC implication by PROT_READ */ 969 /* handle PROT_EXEC implication by PROT_READ */
972 if ((prot & PROT_READ) && 970 if ((prot & PROT_READ) &&
973 (current->personality & READ_IMPLIES_EXEC)) 971 (current->personality & READ_IMPLIES_EXEC))
974 prot |= PROT_EXEC; 972 prot |= PROT_EXEC;
975 } 973 }
976 974
977 /* allow the security API to have its say */ 975 /* allow the security API to have its say */
978 ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); 976 ret = security_file_mmap(file, reqprot, prot, flags, addr, 0);
979 if (ret < 0) 977 if (ret < 0)
980 return ret; 978 return ret;
981 979
982 /* looks okay */ 980 /* looks okay */
983 *_capabilities = capabilities; 981 *_capabilities = capabilities;
984 return 0; 982 return 0;
985 } 983 }
986 984
987 /* 985 /*
988 * we've determined that we can make the mapping, now translate what we 986 * we've determined that we can make the mapping, now translate what we
989 * now know into VMA flags 987 * now know into VMA flags
990 */ 988 */
991 static unsigned long determine_vm_flags(struct file *file, 989 static unsigned long determine_vm_flags(struct file *file,
992 unsigned long prot, 990 unsigned long prot,
993 unsigned long flags, 991 unsigned long flags,
994 unsigned long capabilities) 992 unsigned long capabilities)
995 { 993 {
996 unsigned long vm_flags; 994 unsigned long vm_flags;
997 995
998 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags); 996 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags);
999 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 997 vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1000 /* vm_flags |= mm->def_flags; */ 998 /* vm_flags |= mm->def_flags; */
1001 999
1002 if (!(capabilities & BDI_CAP_MAP_DIRECT)) { 1000 if (!(capabilities & BDI_CAP_MAP_DIRECT)) {
1003 /* attempt to share read-only copies of mapped file chunks */ 1001 /* attempt to share read-only copies of mapped file chunks */
1004 if (file && !(prot & PROT_WRITE)) 1002 if (file && !(prot & PROT_WRITE))
1005 vm_flags |= VM_MAYSHARE; 1003 vm_flags |= VM_MAYSHARE;
1006 } 1004 }
1007 else { 1005 else {
1008 /* overlay a shareable mapping on the backing device or inode 1006 /* overlay a shareable mapping on the backing device or inode
1009 * if possible - used for chardevs, ramfs/tmpfs/shmfs and 1007 * if possible - used for chardevs, ramfs/tmpfs/shmfs and
1010 * romfs/cramfs */ 1008 * romfs/cramfs */
1011 if (flags & MAP_SHARED) 1009 if (flags & MAP_SHARED)
1012 vm_flags |= VM_MAYSHARE | VM_SHARED; 1010 vm_flags |= VM_MAYSHARE | VM_SHARED;
1013 else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0) 1011 else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0)
1014 vm_flags |= VM_MAYSHARE; 1012 vm_flags |= VM_MAYSHARE;
1015 } 1013 }
1016 1014
1017 /* refuse to let anyone share private mappings with this process if 1015 /* refuse to let anyone share private mappings with this process if
1018 * it's being traced - otherwise breakpoints set in it may interfere 1016 * it's being traced - otherwise breakpoints set in it may interfere
1019 * with another untraced process 1017 * with another untraced process
1020 */ 1018 */
1021 if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current)) 1019 if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current))
1022 vm_flags &= ~VM_MAYSHARE; 1020 vm_flags &= ~VM_MAYSHARE;
1023 1021
1024 return vm_flags; 1022 return vm_flags;
1025 } 1023 }
1026 1024
1027 /* 1025 /*
1028 * set up a shared mapping on a file (the driver or filesystem provides and 1026 * set up a shared mapping on a file (the driver or filesystem provides and
1029 * pins the storage) 1027 * pins the storage)
1030 */ 1028 */
1031 static int do_mmap_shared_file(struct vm_area_struct *vma) 1029 static int do_mmap_shared_file(struct vm_area_struct *vma)
1032 { 1030 {
1033 int ret; 1031 int ret;
1034 1032
1035 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1033 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
1036 if (ret == 0) { 1034 if (ret == 0) {
1037 vma->vm_region->vm_top = vma->vm_region->vm_end; 1035 vma->vm_region->vm_top = vma->vm_region->vm_end;
1038 return ret; 1036 return ret;
1039 } 1037 }
1040 if (ret != -ENOSYS) 1038 if (ret != -ENOSYS)
1041 return ret; 1039 return ret;
1042 1040
1043 /* getting an ENOSYS error indicates that direct mmap isn't 1041 /* getting an ENOSYS error indicates that direct mmap isn't
1044 * possible (as opposed to tried but failed) so we'll fall 1042 * possible (as opposed to tried but failed) so we'll fall
1045 * through to making a private copy of the data and mapping 1043 * through to making a private copy of the data and mapping
1046 * that if we can */ 1044 * that if we can */
1047 return -ENODEV; 1045 return -ENODEV;
1048 } 1046 }
1049 1047
1050 /* 1048 /*
1051 * set up a private mapping or an anonymous shared mapping 1049 * set up a private mapping or an anonymous shared mapping
1052 */ 1050 */
1053 static int do_mmap_private(struct vm_area_struct *vma, 1051 static int do_mmap_private(struct vm_area_struct *vma,
1054 struct vm_region *region, 1052 struct vm_region *region,
1055 unsigned long len) 1053 unsigned long len)
1056 { 1054 {
1057 struct page *pages; 1055 struct page *pages;
1058 unsigned long total, point, n, rlen; 1056 unsigned long total, point, n, rlen;
1059 void *base; 1057 void *base;
1060 int ret, order; 1058 int ret, order;
1061 1059
1062 /* invoke the file's mapping function so that it can keep track of 1060 /* invoke the file's mapping function so that it can keep track of
1063 * shared mappings on devices or memory 1061 * shared mappings on devices or memory
1064 * - VM_MAYSHARE will be set if it may attempt to share 1062 * - VM_MAYSHARE will be set if it may attempt to share
1065 */ 1063 */
1066 if (vma->vm_file) { 1064 if (vma->vm_file) {
1067 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1065 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
1068 if (ret == 0) { 1066 if (ret == 0) {
1069 /* shouldn't return success if we're not sharing */ 1067 /* shouldn't return success if we're not sharing */
1070 BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); 1068 BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
1071 vma->vm_region->vm_top = vma->vm_region->vm_end; 1069 vma->vm_region->vm_top = vma->vm_region->vm_end;
1072 return ret; 1070 return ret;
1073 } 1071 }
1074 if (ret != -ENOSYS) 1072 if (ret != -ENOSYS)
1075 return ret; 1073 return ret;
1076 1074
1077 /* getting an ENOSYS error indicates that direct mmap isn't 1075 /* getting an ENOSYS error indicates that direct mmap isn't
1078 * possible (as opposed to tried but failed) so we'll try to 1076 * possible (as opposed to tried but failed) so we'll try to
1079 * make a private copy of the data and map that instead */ 1077 * make a private copy of the data and map that instead */
1080 } 1078 }
1081 1079
1082 rlen = PAGE_ALIGN(len); 1080 rlen = PAGE_ALIGN(len);
1083 1081
1084 /* allocate some memory to hold the mapping 1082 /* allocate some memory to hold the mapping
1085 * - note that this may not return a page-aligned address if the object 1083 * - note that this may not return a page-aligned address if the object
1086 * we're allocating is smaller than a page 1084 * we're allocating is smaller than a page
1087 */ 1085 */
1088 order = get_order(rlen); 1086 order = get_order(rlen);
1089 kdebug("alloc order %d for %lx", order, len); 1087 kdebug("alloc order %d for %lx", order, len);
1090 1088
1091 pages = alloc_pages(GFP_KERNEL, order); 1089 pages = alloc_pages(GFP_KERNEL, order);
1092 if (!pages) 1090 if (!pages)
1093 goto enomem; 1091 goto enomem;
1094 1092
1095 total = 1 << order; 1093 total = 1 << order;
1096 atomic_long_add(total, &mmap_pages_allocated); 1094 atomic_long_add(total, &mmap_pages_allocated);
1097 1095
1098 point = rlen >> PAGE_SHIFT; 1096 point = rlen >> PAGE_SHIFT;
1099 1097
1100 /* we allocated a power-of-2 sized page set, so we may want to trim off 1098 /* we allocated a power-of-2 sized page set, so we may want to trim off
1101 * the excess */ 1099 * the excess */
1102 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) { 1100 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) {
1103 while (total > point) { 1101 while (total > point) {
1104 order = ilog2(total - point); 1102 order = ilog2(total - point);
1105 n = 1 << order; 1103 n = 1 << order;
1106 kdebug("shave %lu/%lu @%lu", n, total - point, total); 1104 kdebug("shave %lu/%lu @%lu", n, total - point, total);
1107 atomic_long_sub(n, &mmap_pages_allocated); 1105 atomic_long_sub(n, &mmap_pages_allocated);
1108 total -= n; 1106 total -= n;
1109 set_page_refcounted(pages + total); 1107 set_page_refcounted(pages + total);
1110 __free_pages(pages + total, order); 1108 __free_pages(pages + total, order);
1111 } 1109 }
1112 } 1110 }
1113 1111
1114 for (point = 1; point < total; point++) 1112 for (point = 1; point < total; point++)
1115 set_page_refcounted(&pages[point]); 1113 set_page_refcounted(&pages[point]);
1116 1114
1117 base = page_address(pages); 1115 base = page_address(pages);
1118 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; 1116 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
1119 region->vm_start = (unsigned long) base; 1117 region->vm_start = (unsigned long) base;
1120 region->vm_end = region->vm_start + rlen; 1118 region->vm_end = region->vm_start + rlen;
1121 region->vm_top = region->vm_start + (total << PAGE_SHIFT); 1119 region->vm_top = region->vm_start + (total << PAGE_SHIFT);
1122 1120
1123 vma->vm_start = region->vm_start; 1121 vma->vm_start = region->vm_start;
1124 vma->vm_end = region->vm_start + len; 1122 vma->vm_end = region->vm_start + len;
1125 1123
1126 if (vma->vm_file) { 1124 if (vma->vm_file) {
1127 /* read the contents of a file into the copy */ 1125 /* read the contents of a file into the copy */
1128 mm_segment_t old_fs; 1126 mm_segment_t old_fs;
1129 loff_t fpos; 1127 loff_t fpos;
1130 1128
1131 fpos = vma->vm_pgoff; 1129 fpos = vma->vm_pgoff;
1132 fpos <<= PAGE_SHIFT; 1130 fpos <<= PAGE_SHIFT;
1133 1131
1134 old_fs = get_fs(); 1132 old_fs = get_fs();
1135 set_fs(KERNEL_DS); 1133 set_fs(KERNEL_DS);
1136 ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos); 1134 ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos);
1137 set_fs(old_fs); 1135 set_fs(old_fs);
1138 1136
1139 if (ret < 0) 1137 if (ret < 0)
1140 goto error_free; 1138 goto error_free;
1141 1139
1142 /* clear the last little bit */ 1140 /* clear the last little bit */
1143 if (ret < rlen) 1141 if (ret < rlen)
1144 memset(base + ret, 0, rlen - ret); 1142 memset(base + ret, 0, rlen - ret);
1145 1143
1146 } else { 1144 } else {
1147 /* if it's an anonymous mapping, then just clear it */ 1145 /* if it's an anonymous mapping, then just clear it */
1148 memset(base, 0, rlen); 1146 memset(base, 0, rlen);
1149 } 1147 }
1150 1148
1151 return 0; 1149 return 0;
1152 1150
1153 error_free: 1151 error_free:
1154 free_page_series(region->vm_start, region->vm_end); 1152 free_page_series(region->vm_start, region->vm_end);
1155 region->vm_start = vma->vm_start = 0; 1153 region->vm_start = vma->vm_start = 0;
1156 region->vm_end = vma->vm_end = 0; 1154 region->vm_end = vma->vm_end = 0;
1157 region->vm_top = 0; 1155 region->vm_top = 0;
1158 return ret; 1156 return ret;
1159 1157
1160 enomem: 1158 enomem:
1161 printk("Allocation of length %lu from process %d (%s) failed\n", 1159 printk("Allocation of length %lu from process %d (%s) failed\n",
1162 len, current->pid, current->comm); 1160 len, current->pid, current->comm);
1163 show_free_areas(); 1161 show_free_areas();
1164 return -ENOMEM; 1162 return -ENOMEM;
1165 } 1163 }
1166 1164
1167 /* 1165 /*
1168 * handle mapping creation for uClinux 1166 * handle mapping creation for uClinux
1169 */ 1167 */
1170 unsigned long do_mmap_pgoff(struct file *file, 1168 unsigned long do_mmap_pgoff(struct file *file,
1171 unsigned long addr, 1169 unsigned long addr,
1172 unsigned long len, 1170 unsigned long len,
1173 unsigned long prot, 1171 unsigned long prot,
1174 unsigned long flags, 1172 unsigned long flags,
1175 unsigned long pgoff) 1173 unsigned long pgoff)
1176 { 1174 {
1177 struct vm_area_struct *vma; 1175 struct vm_area_struct *vma;
1178 struct vm_region *region; 1176 struct vm_region *region;
1179 struct rb_node *rb; 1177 struct rb_node *rb;
1180 unsigned long capabilities, vm_flags, result; 1178 unsigned long capabilities, vm_flags, result;
1181 int ret; 1179 int ret;
1182 1180
1183 kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); 1181 kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
1184 1182
1185 if (!(flags & MAP_FIXED)) 1183 if (!(flags & MAP_FIXED))
1186 addr = round_hint_to_min(addr); 1184 addr = round_hint_to_min(addr);
1187 1185
1188 /* decide whether we should attempt the mapping, and if so what sort of 1186 /* decide whether we should attempt the mapping, and if so what sort of
1189 * mapping */ 1187 * mapping */
1190 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 1188 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
1191 &capabilities); 1189 &capabilities);
1192 if (ret < 0) { 1190 if (ret < 0) {
1193 kleave(" = %d [val]", ret); 1191 kleave(" = %d [val]", ret);
1194 return ret; 1192 return ret;
1195 } 1193 }
1196 1194
1197 /* we've determined that we can make the mapping, now translate what we 1195 /* we've determined that we can make the mapping, now translate what we
1198 * now know into VMA flags */ 1196 * now know into VMA flags */
1199 vm_flags = determine_vm_flags(file, prot, flags, capabilities); 1197 vm_flags = determine_vm_flags(file, prot, flags, capabilities);
1200 1198
1201 /* we're going to need to record the mapping */ 1199 /* we're going to need to record the mapping */
1202 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); 1200 region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
1203 if (!region) 1201 if (!region)
1204 goto error_getting_region; 1202 goto error_getting_region;
1205 1203
1206 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 1204 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1207 if (!vma) 1205 if (!vma)
1208 goto error_getting_vma; 1206 goto error_getting_vma;
1209 1207
1210 atomic_set(&region->vm_usage, 1); 1208 atomic_set(&region->vm_usage, 1);
1211 region->vm_flags = vm_flags; 1209 region->vm_flags = vm_flags;
1212 region->vm_pgoff = pgoff; 1210 region->vm_pgoff = pgoff;
1213 1211
1214 INIT_LIST_HEAD(&vma->anon_vma_node); 1212 INIT_LIST_HEAD(&vma->anon_vma_node);
1215 vma->vm_flags = vm_flags; 1213 vma->vm_flags = vm_flags;
1216 vma->vm_pgoff = pgoff; 1214 vma->vm_pgoff = pgoff;
1217 1215
1218 if (file) { 1216 if (file) {
1219 region->vm_file = file; 1217 region->vm_file = file;
1220 get_file(file); 1218 get_file(file);
1221 vma->vm_file = file; 1219 vma->vm_file = file;
1222 get_file(file); 1220 get_file(file);
1223 if (vm_flags & VM_EXECUTABLE) { 1221 if (vm_flags & VM_EXECUTABLE) {
1224 added_exe_file_vma(current->mm); 1222 added_exe_file_vma(current->mm);
1225 vma->vm_mm = current->mm; 1223 vma->vm_mm = current->mm;
1226 } 1224 }
1227 } 1225 }
1228 1226
1229 down_write(&nommu_region_sem); 1227 down_write(&nommu_region_sem);
1230 1228
1231 /* if we want to share, we need to check for regions created by other 1229 /* if we want to share, we need to check for regions created by other
1232 * mmap() calls that overlap with our proposed mapping 1230 * mmap() calls that overlap with our proposed mapping
1233 * - we can only share with a superset match on most regular files 1231 * - we can only share with a superset match on most regular files
1234 * - shared mappings on character devices and memory backed files are 1232 * - shared mappings on character devices and memory backed files are
1235 * permitted to overlap inexactly as far as we are concerned for in 1233 * permitted to overlap inexactly as far as we are concerned for in
1236 * these cases, sharing is handled in the driver or filesystem rather 1234 * these cases, sharing is handled in the driver or filesystem rather
1237 * than here 1235 * than here
1238 */ 1236 */
1239 if (vm_flags & VM_MAYSHARE) { 1237 if (vm_flags & VM_MAYSHARE) {
1240 struct vm_region *pregion; 1238 struct vm_region *pregion;
1241 unsigned long pglen, rpglen, pgend, rpgend, start; 1239 unsigned long pglen, rpglen, pgend, rpgend, start;
1242 1240
1243 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1241 pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1244 pgend = pgoff + pglen; 1242 pgend = pgoff + pglen;
1245 1243
1246 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { 1244 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
1247 pregion = rb_entry(rb, struct vm_region, vm_rb); 1245 pregion = rb_entry(rb, struct vm_region, vm_rb);
1248 1246
1249 if (!(pregion->vm_flags & VM_MAYSHARE)) 1247 if (!(pregion->vm_flags & VM_MAYSHARE))
1250 continue; 1248 continue;
1251 1249
1252 /* search for overlapping mappings on the same file */ 1250 /* search for overlapping mappings on the same file */
1253 if (pregion->vm_file->f_path.dentry->d_inode != 1251 if (pregion->vm_file->f_path.dentry->d_inode !=
1254 file->f_path.dentry->d_inode) 1252 file->f_path.dentry->d_inode)
1255 continue; 1253 continue;
1256 1254
1257 if (pregion->vm_pgoff >= pgend) 1255 if (pregion->vm_pgoff >= pgend)
1258 continue; 1256 continue;
1259 1257
1260 rpglen = pregion->vm_end - pregion->vm_start; 1258 rpglen = pregion->vm_end - pregion->vm_start;
1261 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; 1259 rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;
1262 rpgend = pregion->vm_pgoff + rpglen; 1260 rpgend = pregion->vm_pgoff + rpglen;
1263 if (pgoff >= rpgend) 1261 if (pgoff >= rpgend)
1264 continue; 1262 continue;
1265 1263
1266 /* handle inexactly overlapping matches between 1264 /* handle inexactly overlapping matches between
1267 * mappings */ 1265 * mappings */
1268 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && 1266 if ((pregion->vm_pgoff != pgoff || rpglen != pglen) &&
1269 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { 1267 !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) {
1270 /* new mapping is not a subset of the region */ 1268 /* new mapping is not a subset of the region */
1271 if (!(capabilities & BDI_CAP_MAP_DIRECT)) 1269 if (!(capabilities & BDI_CAP_MAP_DIRECT))
1272 goto sharing_violation; 1270 goto sharing_violation;
1273 continue; 1271 continue;
1274 } 1272 }
1275 1273
1276 /* we've found a region we can share */ 1274 /* we've found a region we can share */
1277 atomic_inc(&pregion->vm_usage); 1275 atomic_inc(&pregion->vm_usage);
1278 vma->vm_region = pregion; 1276 vma->vm_region = pregion;
1279 start = pregion->vm_start; 1277 start = pregion->vm_start;
1280 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; 1278 start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
1281 vma->vm_start = start; 1279 vma->vm_start = start;
1282 vma->vm_end = start + len; 1280 vma->vm_end = start + len;
1283 1281
1284 if (pregion->vm_flags & VM_MAPPED_COPY) { 1282 if (pregion->vm_flags & VM_MAPPED_COPY) {
1285 kdebug("share copy"); 1283 kdebug("share copy");
1286 vma->vm_flags |= VM_MAPPED_COPY; 1284 vma->vm_flags |= VM_MAPPED_COPY;
1287 } else { 1285 } else {
1288 kdebug("share mmap"); 1286 kdebug("share mmap");
1289 ret = do_mmap_shared_file(vma); 1287 ret = do_mmap_shared_file(vma);
1290 if (ret < 0) { 1288 if (ret < 0) {
1291 vma->vm_region = NULL; 1289 vma->vm_region = NULL;
1292 vma->vm_start = 0; 1290 vma->vm_start = 0;
1293 vma->vm_end = 0; 1291 vma->vm_end = 0;
1294 atomic_dec(&pregion->vm_usage); 1292 atomic_dec(&pregion->vm_usage);
1295 pregion = NULL; 1293 pregion = NULL;
1296 goto error_just_free; 1294 goto error_just_free;
1297 } 1295 }
1298 } 1296 }
1299 fput(region->vm_file); 1297 fput(region->vm_file);
1300 kmem_cache_free(vm_region_jar, region); 1298 kmem_cache_free(vm_region_jar, region);
1301 region = pregion; 1299 region = pregion;
1302 result = start; 1300 result = start;
1303 goto share; 1301 goto share;
1304 } 1302 }
1305 1303
1306 /* obtain the address at which to make a shared mapping 1304 /* obtain the address at which to make a shared mapping
1307 * - this is the hook for quasi-memory character devices to 1305 * - this is the hook for quasi-memory character devices to
1308 * tell us the location of a shared mapping 1306 * tell us the location of a shared mapping
1309 */ 1307 */
1310 if (file && file->f_op->get_unmapped_area) { 1308 if (file && file->f_op->get_unmapped_area) {
1311 addr = file->f_op->get_unmapped_area(file, addr, len, 1309 addr = file->f_op->get_unmapped_area(file, addr, len,
1312 pgoff, flags); 1310 pgoff, flags);
1313 if (IS_ERR((void *) addr)) { 1311 if (IS_ERR((void *) addr)) {
1314 ret = addr; 1312 ret = addr;
1315 if (ret != (unsigned long) -ENOSYS) 1313 if (ret != (unsigned long) -ENOSYS)
1316 goto error_just_free; 1314 goto error_just_free;
1317 1315
1318 /* the driver refused to tell us where to site 1316 /* the driver refused to tell us where to site
1319 * the mapping so we'll have to attempt to copy 1317 * the mapping so we'll have to attempt to copy
1320 * it */ 1318 * it */
1321 ret = (unsigned long) -ENODEV; 1319 ret = (unsigned long) -ENODEV;
1322 if (!(capabilities & BDI_CAP_MAP_COPY)) 1320 if (!(capabilities & BDI_CAP_MAP_COPY))
1323 goto error_just_free; 1321 goto error_just_free;
1324 1322
1325 capabilities &= ~BDI_CAP_MAP_DIRECT; 1323 capabilities &= ~BDI_CAP_MAP_DIRECT;
1326 } else { 1324 } else {
1327 vma->vm_start = region->vm_start = addr; 1325 vma->vm_start = region->vm_start = addr;
1328 vma->vm_end = region->vm_end = addr + len; 1326 vma->vm_end = region->vm_end = addr + len;
1329 } 1327 }
1330 } 1328 }
1331 } 1329 }
1332 1330
1333 vma->vm_region = region; 1331 vma->vm_region = region;
1334 1332
1335 /* set up the mapping */ 1333 /* set up the mapping */
1336 if (file && vma->vm_flags & VM_SHARED) 1334 if (file && vma->vm_flags & VM_SHARED)
1337 ret = do_mmap_shared_file(vma); 1335 ret = do_mmap_shared_file(vma);
1338 else 1336 else
1339 ret = do_mmap_private(vma, region, len); 1337 ret = do_mmap_private(vma, region, len);
1340 if (ret < 0) 1338 if (ret < 0)
1341 goto error_put_region; 1339 goto error_put_region;
1342 1340
1343 add_nommu_region(region); 1341 add_nommu_region(region);
1344 1342
1345 /* okay... we have a mapping; now we have to register it */ 1343 /* okay... we have a mapping; now we have to register it */
1346 result = vma->vm_start; 1344 result = vma->vm_start;
1347 1345
1348 current->mm->total_vm += len >> PAGE_SHIFT; 1346 current->mm->total_vm += len >> PAGE_SHIFT;
1349 1347
1350 share: 1348 share:
1351 add_vma_to_mm(current->mm, vma); 1349 add_vma_to_mm(current->mm, vma);
1352 1350
1353 up_write(&nommu_region_sem); 1351 up_write(&nommu_region_sem);
1354 1352
1355 if (prot & PROT_EXEC) 1353 if (prot & PROT_EXEC)
1356 flush_icache_range(result, result + len); 1354 flush_icache_range(result, result + len);
1357 1355
1358 kleave(" = %lx", result); 1356 kleave(" = %lx", result);
1359 return result; 1357 return result;
1360 1358
1361 error_put_region: 1359 error_put_region:
1362 __put_nommu_region(region); 1360 __put_nommu_region(region);
1363 if (vma) { 1361 if (vma) {
1364 if (vma->vm_file) { 1362 if (vma->vm_file) {
1365 fput(vma->vm_file); 1363 fput(vma->vm_file);
1366 if (vma->vm_flags & VM_EXECUTABLE) 1364 if (vma->vm_flags & VM_EXECUTABLE)
1367 removed_exe_file_vma(vma->vm_mm); 1365 removed_exe_file_vma(vma->vm_mm);
1368 } 1366 }
1369 kmem_cache_free(vm_area_cachep, vma); 1367 kmem_cache_free(vm_area_cachep, vma);
1370 } 1368 }
1371 kleave(" = %d [pr]", ret); 1369 kleave(" = %d [pr]", ret);
1372 return ret; 1370 return ret;
1373 1371
1374 error_just_free: 1372 error_just_free:
1375 up_write(&nommu_region_sem); 1373 up_write(&nommu_region_sem);
1376 error: 1374 error:
1377 fput(region->vm_file); 1375 fput(region->vm_file);
1378 kmem_cache_free(vm_region_jar, region); 1376 kmem_cache_free(vm_region_jar, region);
1379 fput(vma->vm_file); 1377 fput(vma->vm_file);
1380 if (vma->vm_flags & VM_EXECUTABLE) 1378 if (vma->vm_flags & VM_EXECUTABLE)
1381 removed_exe_file_vma(vma->vm_mm); 1379 removed_exe_file_vma(vma->vm_mm);
1382 kmem_cache_free(vm_area_cachep, vma); 1380 kmem_cache_free(vm_area_cachep, vma);
1383 kleave(" = %d", ret); 1381 kleave(" = %d", ret);
1384 return ret; 1382 return ret;
1385 1383
1386 sharing_violation: 1384 sharing_violation:
1387 up_write(&nommu_region_sem); 1385 up_write(&nommu_region_sem);
1388 printk(KERN_WARNING "Attempt to share mismatched mappings\n"); 1386 printk(KERN_WARNING "Attempt to share mismatched mappings\n");
1389 ret = -EINVAL; 1387 ret = -EINVAL;
1390 goto error; 1388 goto error;
1391 1389
1392 error_getting_vma: 1390 error_getting_vma:
1393 kmem_cache_free(vm_region_jar, region); 1391 kmem_cache_free(vm_region_jar, region);
1394 printk(KERN_WARNING "Allocation of vma for %lu byte allocation" 1392 printk(KERN_WARNING "Allocation of vma for %lu byte allocation"
1395 " from process %d failed\n", 1393 " from process %d failed\n",
1396 len, current->pid); 1394 len, current->pid);
1397 show_free_areas(); 1395 show_free_areas();
1398 return -ENOMEM; 1396 return -ENOMEM;
1399 1397
1400 error_getting_region: 1398 error_getting_region:
1401 printk(KERN_WARNING "Allocation of vm region for %lu byte allocation" 1399 printk(KERN_WARNING "Allocation of vm region for %lu byte allocation"
1402 " from process %d failed\n", 1400 " from process %d failed\n",
1403 len, current->pid); 1401 len, current->pid);
1404 show_free_areas(); 1402 show_free_areas();
1405 return -ENOMEM; 1403 return -ENOMEM;
1406 } 1404 }
1407 EXPORT_SYMBOL(do_mmap_pgoff); 1405 EXPORT_SYMBOL(do_mmap_pgoff);
1408 1406
1409 /* 1407 /*
1410 * split a vma into two pieces at address 'addr', a new vma is allocated either 1408 * split a vma into two pieces at address 'addr', a new vma is allocated either
1411 * for the first part or the tail. 1409 * for the first part or the tail.
1412 */ 1410 */
1413 int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 1411 int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
1414 unsigned long addr, int new_below) 1412 unsigned long addr, int new_below)
1415 { 1413 {
1416 struct vm_area_struct *new; 1414 struct vm_area_struct *new;
1417 struct vm_region *region; 1415 struct vm_region *region;
1418 unsigned long npages; 1416 unsigned long npages;
1419 1417
1420 kenter(""); 1418 kenter("");
1421 1419
1422 /* we're only permitted to split anonymous regions that have a single 1420 /* we're only permitted to split anonymous regions that have a single
1423 * owner */ 1421 * owner */
1424 if (vma->vm_file || 1422 if (vma->vm_file ||
1425 atomic_read(&vma->vm_region->vm_usage) != 1) 1423 atomic_read(&vma->vm_region->vm_usage) != 1)
1426 return -ENOMEM; 1424 return -ENOMEM;
1427 1425
1428 if (mm->map_count >= sysctl_max_map_count) 1426 if (mm->map_count >= sysctl_max_map_count)
1429 return -ENOMEM; 1427 return -ENOMEM;
1430 1428
1431 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); 1429 region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL);
1432 if (!region) 1430 if (!region)
1433 return -ENOMEM; 1431 return -ENOMEM;
1434 1432
1435 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 1433 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1436 if (!new) { 1434 if (!new) {
1437 kmem_cache_free(vm_region_jar, region); 1435 kmem_cache_free(vm_region_jar, region);
1438 return -ENOMEM; 1436 return -ENOMEM;
1439 } 1437 }
1440 1438
1441 /* most fields are the same, copy all, and then fixup */ 1439 /* most fields are the same, copy all, and then fixup */
1442 *new = *vma; 1440 *new = *vma;
1443 *region = *vma->vm_region; 1441 *region = *vma->vm_region;
1444 new->vm_region = region; 1442 new->vm_region = region;
1445 1443
1446 npages = (addr - vma->vm_start) >> PAGE_SHIFT; 1444 npages = (addr - vma->vm_start) >> PAGE_SHIFT;
1447 1445
1448 if (new_below) { 1446 if (new_below) {
1449 region->vm_top = region->vm_end = new->vm_end = addr; 1447 region->vm_top = region->vm_end = new->vm_end = addr;
1450 } else { 1448 } else {
1451 region->vm_start = new->vm_start = addr; 1449 region->vm_start = new->vm_start = addr;
1452 region->vm_pgoff = new->vm_pgoff += npages; 1450 region->vm_pgoff = new->vm_pgoff += npages;
1453 } 1451 }
1454 1452
1455 if (new->vm_ops && new->vm_ops->open) 1453 if (new->vm_ops && new->vm_ops->open)
1456 new->vm_ops->open(new); 1454 new->vm_ops->open(new);
1457 1455
1458 delete_vma_from_mm(vma); 1456 delete_vma_from_mm(vma);
1459 down_write(&nommu_region_sem); 1457 down_write(&nommu_region_sem);
1460 delete_nommu_region(vma->vm_region); 1458 delete_nommu_region(vma->vm_region);
1461 if (new_below) { 1459 if (new_below) {
1462 vma->vm_region->vm_start = vma->vm_start = addr; 1460 vma->vm_region->vm_start = vma->vm_start = addr;
1463 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; 1461 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages;
1464 } else { 1462 } else {
1465 vma->vm_region->vm_end = vma->vm_end = addr; 1463 vma->vm_region->vm_end = vma->vm_end = addr;
1466 vma->vm_region->vm_top = addr; 1464 vma->vm_region->vm_top = addr;
1467 } 1465 }
1468 add_nommu_region(vma->vm_region); 1466 add_nommu_region(vma->vm_region);
1469 add_nommu_region(new->vm_region); 1467 add_nommu_region(new->vm_region);
1470 up_write(&nommu_region_sem); 1468 up_write(&nommu_region_sem);
1471 add_vma_to_mm(mm, vma); 1469 add_vma_to_mm(mm, vma);
1472 add_vma_to_mm(mm, new); 1470 add_vma_to_mm(mm, new);
1473 return 0; 1471 return 0;
1474 } 1472 }
1475 1473
1476 /* 1474 /*
1477 * shrink a VMA by removing the specified chunk from either the beginning or 1475 * shrink a VMA by removing the specified chunk from either the beginning or
1478 * the end 1476 * the end
1479 */ 1477 */
1480 static int shrink_vma(struct mm_struct *mm, 1478 static int shrink_vma(struct mm_struct *mm,
1481 struct vm_area_struct *vma, 1479 struct vm_area_struct *vma,
1482 unsigned long from, unsigned long to) 1480 unsigned long from, unsigned long to)
1483 { 1481 {
1484 struct vm_region *region; 1482 struct vm_region *region;
1485 1483
1486 kenter(""); 1484 kenter("");
1487 1485
1488 /* adjust the VMA's pointers, which may reposition it in the MM's tree 1486 /* adjust the VMA's pointers, which may reposition it in the MM's tree
1489 * and list */ 1487 * and list */
1490 delete_vma_from_mm(vma); 1488 delete_vma_from_mm(vma);
1491 if (from > vma->vm_start) 1489 if (from > vma->vm_start)
1492 vma->vm_end = from; 1490 vma->vm_end = from;
1493 else 1491 else
1494 vma->vm_start = to; 1492 vma->vm_start = to;
1495 add_vma_to_mm(mm, vma); 1493 add_vma_to_mm(mm, vma);
1496 1494
1497 /* cut the backing region down to size */ 1495 /* cut the backing region down to size */
1498 region = vma->vm_region; 1496 region = vma->vm_region;
1499 BUG_ON(atomic_read(&region->vm_usage) != 1); 1497 BUG_ON(atomic_read(&region->vm_usage) != 1);
1500 1498
1501 down_write(&nommu_region_sem); 1499 down_write(&nommu_region_sem);
1502 delete_nommu_region(region); 1500 delete_nommu_region(region);
1503 if (from > region->vm_start) { 1501 if (from > region->vm_start) {
1504 to = region->vm_top; 1502 to = region->vm_top;
1505 region->vm_top = region->vm_end = from; 1503 region->vm_top = region->vm_end = from;
1506 } else { 1504 } else {
1507 region->vm_start = to; 1505 region->vm_start = to;
1508 } 1506 }
1509 add_nommu_region(region); 1507 add_nommu_region(region);
1510 up_write(&nommu_region_sem); 1508 up_write(&nommu_region_sem);
1511 1509
1512 free_page_series(from, to); 1510 free_page_series(from, to);
1513 return 0; 1511 return 0;
1514 } 1512 }
1515 1513
1516 /* 1514 /*
1517 * release a mapping 1515 * release a mapping
1518 * - under NOMMU conditions the chunk to be unmapped must be backed by a single 1516 * - under NOMMU conditions the chunk to be unmapped must be backed by a single
1519 * VMA, though it need not cover the whole VMA 1517 * VMA, though it need not cover the whole VMA
1520 */ 1518 */
1521 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) 1519 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1522 { 1520 {
1523 struct vm_area_struct *vma; 1521 struct vm_area_struct *vma;
1524 struct rb_node *rb; 1522 struct rb_node *rb;
1525 unsigned long end = start + len; 1523 unsigned long end = start + len;
1526 int ret; 1524 int ret;
1527 1525
1528 kenter(",%lx,%zx", start, len); 1526 kenter(",%lx,%zx", start, len);
1529 1527
1530 if (len == 0) 1528 if (len == 0)
1531 return -EINVAL; 1529 return -EINVAL;
1532 1530
1533 /* find the first potentially overlapping VMA */ 1531 /* find the first potentially overlapping VMA */
1534 vma = find_vma(mm, start); 1532 vma = find_vma(mm, start);
1535 if (!vma) { 1533 if (!vma) {
1536 static int limit = 0; 1534 static int limit = 0;
1537 if (limit < 5) { 1535 if (limit < 5) {
1538 printk(KERN_WARNING 1536 printk(KERN_WARNING
1539 "munmap of memory not mmapped by process %d" 1537 "munmap of memory not mmapped by process %d"
1540 " (%s): 0x%lx-0x%lx\n", 1538 " (%s): 0x%lx-0x%lx\n",
1541 current->pid, current->comm, 1539 current->pid, current->comm,
1542 start, start + len - 1); 1540 start, start + len - 1);
1543 limit++; 1541 limit++;
1544 } 1542 }
1545 return -EINVAL; 1543 return -EINVAL;
1546 } 1544 }
1547 1545
1548 /* we're allowed to split an anonymous VMA but not a file-backed one */ 1546 /* we're allowed to split an anonymous VMA but not a file-backed one */
1549 if (vma->vm_file) { 1547 if (vma->vm_file) {
1550 do { 1548 do {
1551 if (start > vma->vm_start) { 1549 if (start > vma->vm_start) {
1552 kleave(" = -EINVAL [miss]"); 1550 kleave(" = -EINVAL [miss]");
1553 return -EINVAL; 1551 return -EINVAL;
1554 } 1552 }
1555 if (end == vma->vm_end) 1553 if (end == vma->vm_end)
1556 goto erase_whole_vma; 1554 goto erase_whole_vma;
1557 rb = rb_next(&vma->vm_rb); 1555 rb = rb_next(&vma->vm_rb);
1558 vma = rb_entry(rb, struct vm_area_struct, vm_rb); 1556 vma = rb_entry(rb, struct vm_area_struct, vm_rb);
1559 } while (rb); 1557 } while (rb);
1560 kleave(" = -EINVAL [split file]"); 1558 kleave(" = -EINVAL [split file]");
1561 return -EINVAL; 1559 return -EINVAL;
1562 } else { 1560 } else {
1563 /* the chunk must be a subset of the VMA found */ 1561 /* the chunk must be a subset of the VMA found */
1564 if (start == vma->vm_start && end == vma->vm_end) 1562 if (start == vma->vm_start && end == vma->vm_end)
1565 goto erase_whole_vma; 1563 goto erase_whole_vma;
1566 if (start < vma->vm_start || end > vma->vm_end) { 1564 if (start < vma->vm_start || end > vma->vm_end) {
1567 kleave(" = -EINVAL [superset]"); 1565 kleave(" = -EINVAL [superset]");
1568 return -EINVAL; 1566 return -EINVAL;
1569 } 1567 }
1570 if (start & ~PAGE_MASK) { 1568 if (start & ~PAGE_MASK) {
1571 kleave(" = -EINVAL [unaligned start]"); 1569 kleave(" = -EINVAL [unaligned start]");
1572 return -EINVAL; 1570 return -EINVAL;
1573 } 1571 }
1574 if (end != vma->vm_end && end & ~PAGE_MASK) { 1572 if (end != vma->vm_end && end & ~PAGE_MASK) {
1575 kleave(" = -EINVAL [unaligned split]"); 1573 kleave(" = -EINVAL [unaligned split]");
1576 return -EINVAL; 1574 return -EINVAL;
1577 } 1575 }
1578 if (start != vma->vm_start && end != vma->vm_end) { 1576 if (start != vma->vm_start && end != vma->vm_end) {
1579 ret = split_vma(mm, vma, start, 1); 1577 ret = split_vma(mm, vma, start, 1);
1580 if (ret < 0) { 1578 if (ret < 0) {
1581 kleave(" = %d [split]", ret); 1579 kleave(" = %d [split]", ret);
1582 return ret; 1580 return ret;
1583 } 1581 }
1584 } 1582 }
1585 return shrink_vma(mm, vma, start, end); 1583 return shrink_vma(mm, vma, start, end);
1586 } 1584 }
1587 1585
1588 erase_whole_vma: 1586 erase_whole_vma:
1589 delete_vma_from_mm(vma); 1587 delete_vma_from_mm(vma);
1590 delete_vma(mm, vma); 1588 delete_vma(mm, vma);
1591 kleave(" = 0"); 1589 kleave(" = 0");
1592 return 0; 1590 return 0;
1593 } 1591 }
1594 EXPORT_SYMBOL(do_munmap); 1592 EXPORT_SYMBOL(do_munmap);
1595 1593
1596 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) 1594 SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
1597 { 1595 {
1598 int ret; 1596 int ret;
1599 struct mm_struct *mm = current->mm; 1597 struct mm_struct *mm = current->mm;
1600 1598
1601 down_write(&mm->mmap_sem); 1599 down_write(&mm->mmap_sem);
1602 ret = do_munmap(mm, addr, len); 1600 ret = do_munmap(mm, addr, len);
1603 up_write(&mm->mmap_sem); 1601 up_write(&mm->mmap_sem);
1604 return ret; 1602 return ret;
1605 } 1603 }
1606 1604
1607 /* 1605 /*
1608 * release all the mappings made in a process's VM space 1606 * release all the mappings made in a process's VM space
1609 */ 1607 */
1610 void exit_mmap(struct mm_struct *mm) 1608 void exit_mmap(struct mm_struct *mm)
1611 { 1609 {
1612 struct vm_area_struct *vma; 1610 struct vm_area_struct *vma;
1613 1611
1614 if (!mm) 1612 if (!mm)
1615 return; 1613 return;
1616 1614
1617 kenter(""); 1615 kenter("");
1618 1616
1619 mm->total_vm = 0; 1617 mm->total_vm = 0;
1620 1618
1621 while ((vma = mm->mmap)) { 1619 while ((vma = mm->mmap)) {
1622 mm->mmap = vma->vm_next; 1620 mm->mmap = vma->vm_next;
1623 delete_vma_from_mm(vma); 1621 delete_vma_from_mm(vma);
1624 delete_vma(mm, vma); 1622 delete_vma(mm, vma);
1625 } 1623 }
1626 1624
1627 kleave(""); 1625 kleave("");
1628 } 1626 }
1629 1627
1630 unsigned long do_brk(unsigned long addr, unsigned long len) 1628 unsigned long do_brk(unsigned long addr, unsigned long len)
1631 { 1629 {
1632 return -ENOMEM; 1630 return -ENOMEM;
1633 } 1631 }
1634 1632
1635 /* 1633 /*
1636 * expand (or shrink) an existing mapping, potentially moving it at the same 1634 * expand (or shrink) an existing mapping, potentially moving it at the same
1637 * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 1635 * time (controlled by the MREMAP_MAYMOVE flag and available VM space)
1638 * 1636 *
1639 * under NOMMU conditions, we only permit changing a mapping's size, and only 1637 * under NOMMU conditions, we only permit changing a mapping's size, and only
1640 * as long as it stays within the region allocated by do_mmap_private() and the 1638 * as long as it stays within the region allocated by do_mmap_private() and the
1641 * block is not shareable 1639 * block is not shareable
1642 * 1640 *
1643 * MREMAP_FIXED is not supported under NOMMU conditions 1641 * MREMAP_FIXED is not supported under NOMMU conditions
1644 */ 1642 */
1645 unsigned long do_mremap(unsigned long addr, 1643 unsigned long do_mremap(unsigned long addr,
1646 unsigned long old_len, unsigned long new_len, 1644 unsigned long old_len, unsigned long new_len,
1647 unsigned long flags, unsigned long new_addr) 1645 unsigned long flags, unsigned long new_addr)
1648 { 1646 {
1649 struct vm_area_struct *vma; 1647 struct vm_area_struct *vma;
1650 1648
1651 /* insanity checks first */ 1649 /* insanity checks first */
1652 if (old_len == 0 || new_len == 0) 1650 if (old_len == 0 || new_len == 0)
1653 return (unsigned long) -EINVAL; 1651 return (unsigned long) -EINVAL;
1654 1652
1655 if (addr & ~PAGE_MASK) 1653 if (addr & ~PAGE_MASK)
1656 return -EINVAL; 1654 return -EINVAL;
1657 1655
1658 if (flags & MREMAP_FIXED && new_addr != addr) 1656 if (flags & MREMAP_FIXED && new_addr != addr)
1659 return (unsigned long) -EINVAL; 1657 return (unsigned long) -EINVAL;
1660 1658
1661 vma = find_vma_exact(current->mm, addr, old_len); 1659 vma = find_vma_exact(current->mm, addr, old_len);
1662 if (!vma) 1660 if (!vma)
1663 return (unsigned long) -EINVAL; 1661 return (unsigned long) -EINVAL;
1664 1662
1665 if (vma->vm_end != vma->vm_start + old_len) 1663 if (vma->vm_end != vma->vm_start + old_len)
1666 return (unsigned long) -EFAULT; 1664 return (unsigned long) -EFAULT;
1667 1665
1668 if (vma->vm_flags & VM_MAYSHARE) 1666 if (vma->vm_flags & VM_MAYSHARE)
1669 return (unsigned long) -EPERM; 1667 return (unsigned long) -EPERM;
1670 1668
1671 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) 1669 if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
1672 return (unsigned long) -ENOMEM; 1670 return (unsigned long) -ENOMEM;
1673 1671
1674 /* all checks complete - do it */ 1672 /* all checks complete - do it */
1675 vma->vm_end = vma->vm_start + new_len; 1673 vma->vm_end = vma->vm_start + new_len;
1676 return vma->vm_start; 1674 return vma->vm_start;
1677 } 1675 }
1678 EXPORT_SYMBOL(do_mremap); 1676 EXPORT_SYMBOL(do_mremap);
1679 1677
1680 SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, 1678 SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
1681 unsigned long, new_len, unsigned long, flags, 1679 unsigned long, new_len, unsigned long, flags,
1682 unsigned long, new_addr) 1680 unsigned long, new_addr)
1683 { 1681 {
1684 unsigned long ret; 1682 unsigned long ret;
1685 1683
1686 down_write(&current->mm->mmap_sem); 1684 down_write(&current->mm->mmap_sem);
1687 ret = do_mremap(addr, old_len, new_len, flags, new_addr); 1685 ret = do_mremap(addr, old_len, new_len, flags, new_addr);
1688 up_write(&current->mm->mmap_sem); 1686 up_write(&current->mm->mmap_sem);
1689 return ret; 1687 return ret;
1690 } 1688 }
1691 1689
1692 struct page *follow_page(struct vm_area_struct *vma, unsigned long address, 1690 struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1693 unsigned int foll_flags) 1691 unsigned int foll_flags)
1694 { 1692 {
1695 return NULL; 1693 return NULL;
1696 } 1694 }
1697 1695
1698 int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, 1696 int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
1699 unsigned long to, unsigned long size, pgprot_t prot) 1697 unsigned long to, unsigned long size, pgprot_t prot)
1700 { 1698 {
1701 vma->vm_start = vma->vm_pgoff << PAGE_SHIFT; 1699 vma->vm_start = vma->vm_pgoff << PAGE_SHIFT;
1702 return 0; 1700 return 0;
1703 } 1701 }
1704 EXPORT_SYMBOL(remap_pfn_range); 1702 EXPORT_SYMBOL(remap_pfn_range);
1705 1703
1706 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1704 int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
1707 unsigned long pgoff) 1705 unsigned long pgoff)
1708 { 1706 {
1709 unsigned int size = vma->vm_end - vma->vm_start; 1707 unsigned int size = vma->vm_end - vma->vm_start;
1710 1708
1711 if (!(vma->vm_flags & VM_USERMAP)) 1709 if (!(vma->vm_flags & VM_USERMAP))
1712 return -EINVAL; 1710 return -EINVAL;
1713 1711
1714 vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1712 vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT));
1715 vma->vm_end = vma->vm_start + size; 1713 vma->vm_end = vma->vm_start + size;
1716 1714
1717 return 0; 1715 return 0;
1718 } 1716 }
1719 EXPORT_SYMBOL(remap_vmalloc_range); 1717 EXPORT_SYMBOL(remap_vmalloc_range);
1720 1718
1721 void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 1719 void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1722 { 1720 {
1723 } 1721 }
1724 1722
1725 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, 1723 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
1726 unsigned long len, unsigned long pgoff, unsigned long flags) 1724 unsigned long len, unsigned long pgoff, unsigned long flags)
1727 { 1725 {
1728 return -ENOMEM; 1726 return -ENOMEM;
1729 } 1727 }
1730 1728
1731 void arch_unmap_area(struct mm_struct *mm, unsigned long addr) 1729 void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1732 { 1730 {
1733 } 1731 }
1734 1732
1735 void unmap_mapping_range(struct address_space *mapping, 1733 void unmap_mapping_range(struct address_space *mapping,
1736 loff_t const holebegin, loff_t const holelen, 1734 loff_t const holebegin, loff_t const holelen,
1737 int even_cows) 1735 int even_cows)
1738 { 1736 {
1739 } 1737 }
1740 EXPORT_SYMBOL(unmap_mapping_range); 1738 EXPORT_SYMBOL(unmap_mapping_range);
1741 1739
1742 /* 1740 /*
1743 * ask for an unmapped area at which to create a mapping on a file 1741 * ask for an unmapped area at which to create a mapping on a file
1744 */ 1742 */
1745 unsigned long get_unmapped_area(struct file *file, unsigned long addr, 1743 unsigned long get_unmapped_area(struct file *file, unsigned long addr,
1746 unsigned long len, unsigned long pgoff, 1744 unsigned long len, unsigned long pgoff,
1747 unsigned long flags) 1745 unsigned long flags)
1748 { 1746 {
1749 unsigned long (*get_area)(struct file *, unsigned long, unsigned long, 1747 unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
1750 unsigned long, unsigned long); 1748 unsigned long, unsigned long);
1751 1749
1752 get_area = current->mm->get_unmapped_area; 1750 get_area = current->mm->get_unmapped_area;
1753 if (file && file->f_op && file->f_op->get_unmapped_area) 1751 if (file && file->f_op && file->f_op->get_unmapped_area)
1754 get_area = file->f_op->get_unmapped_area; 1752 get_area = file->f_op->get_unmapped_area;
1755 1753
1756 if (!get_area) 1754 if (!get_area)
1757 return -ENOSYS; 1755 return -ENOSYS;
1758 1756
1759 return get_area(file, addr, len, pgoff, flags); 1757 return get_area(file, addr, len, pgoff, flags);
1760 } 1758 }
1761 EXPORT_SYMBOL(get_unmapped_area); 1759 EXPORT_SYMBOL(get_unmapped_area);
1762 1760
1763 /* 1761 /*
1764 * Check that a process has enough memory to allocate a new virtual 1762 * Check that a process has enough memory to allocate a new virtual
1765 * mapping. 0 means there is enough memory for the allocation to 1763 * mapping. 0 means there is enough memory for the allocation to
1766 * succeed and -ENOMEM implies there is not. 1764 * succeed and -ENOMEM implies there is not.
1767 * 1765 *
1768 * We currently support three overcommit policies, which are set via the 1766 * We currently support three overcommit policies, which are set via the
1769 * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting 1767 * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
1770 * 1768 *
1771 * Strict overcommit modes added 2002 Feb 26 by Alan Cox. 1769 * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
1772 * Additional code 2002 Jul 20 by Robert Love. 1770 * Additional code 2002 Jul 20 by Robert Love.
1773 * 1771 *
1774 * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. 1772 * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
1775 * 1773 *
1776 * Note this is a helper function intended to be used by LSMs which 1774 * Note this is a helper function intended to be used by LSMs which
1777 * wish to use this logic. 1775 * wish to use this logic.
1778 */ 1776 */
1779 int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) 1777 int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
1780 { 1778 {
1781 unsigned long free, allowed; 1779 unsigned long free, allowed;
1782 1780
1783 vm_acct_memory(pages); 1781 vm_acct_memory(pages);
1784 1782
1785 /* 1783 /*
1786 * Sometimes we want to use more memory than we have 1784 * Sometimes we want to use more memory than we have
1787 */ 1785 */
1788 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) 1786 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
1789 return 0; 1787 return 0;
1790 1788
1791 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 1789 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
1792 unsigned long n; 1790 unsigned long n;
1793 1791
1794 free = global_page_state(NR_FILE_PAGES); 1792 free = global_page_state(NR_FILE_PAGES);
1795 free += nr_swap_pages; 1793 free += nr_swap_pages;
1796 1794
1797 /* 1795 /*
1798 * Any slabs which are created with the 1796 * Any slabs which are created with the
1799 * SLAB_RECLAIM_ACCOUNT flag claim to have contents 1797 * SLAB_RECLAIM_ACCOUNT flag claim to have contents
1800 * which are reclaimable, under pressure. The dentry 1798 * which are reclaimable, under pressure. The dentry
1801 * cache and most inode caches should fall into this 1799 * cache and most inode caches should fall into this
1802 */ 1800 */
1803 free += global_page_state(NR_SLAB_RECLAIMABLE); 1801 free += global_page_state(NR_SLAB_RECLAIMABLE);
1804 1802
1805 /* 1803 /*
1806 * Leave the last 3% for root 1804 * Leave the last 3% for root
1807 */ 1805 */
1808 if (!cap_sys_admin) 1806 if (!cap_sys_admin)
1809 free -= free / 32; 1807 free -= free / 32;
1810 1808
1811 if (free > pages) 1809 if (free > pages)
1812 return 0; 1810 return 0;
1813 1811
1814 /* 1812 /*
1815 * nr_free_pages() is very expensive on large systems, 1813 * nr_free_pages() is very expensive on large systems,
1816 * only call if we're about to fail. 1814 * only call if we're about to fail.
1817 */ 1815 */
1818 n = nr_free_pages(); 1816 n = nr_free_pages();
1819 1817
1820 /* 1818 /*
1821 * Leave reserved pages. The pages are not for anonymous pages. 1819 * Leave reserved pages. The pages are not for anonymous pages.
1822 */ 1820 */
1823 if (n <= totalreserve_pages) 1821 if (n <= totalreserve_pages)
1824 goto error; 1822 goto error;
1825 else 1823 else
1826 n -= totalreserve_pages; 1824 n -= totalreserve_pages;
1827 1825
1828 /* 1826 /*
1829 * Leave the last 3% for root 1827 * Leave the last 3% for root
1830 */ 1828 */
1831 if (!cap_sys_admin) 1829 if (!cap_sys_admin)
1832 n -= n / 32; 1830 n -= n / 32;
1833 free += n; 1831 free += n;
1834 1832
1835 if (free > pages) 1833 if (free > pages)
1836 return 0; 1834 return 0;
1837 1835
1838 goto error; 1836 goto error;
1839 } 1837 }
1840 1838
1841 allowed = totalram_pages * sysctl_overcommit_ratio / 100; 1839 allowed = totalram_pages * sysctl_overcommit_ratio / 100;
1842 /* 1840 /*
1843 * Leave the last 3% for root 1841 * Leave the last 3% for root
1844 */ 1842 */
1845 if (!cap_sys_admin) 1843 if (!cap_sys_admin)
1846 allowed -= allowed / 32; 1844 allowed -= allowed / 32;
1847 allowed += total_swap_pages; 1845 allowed += total_swap_pages;
1848 1846
1849 /* Don't let a single process grow too big: 1847 /* Don't let a single process grow too big:
1850 leave 3% of the size of this process for other processes */ 1848 leave 3% of the size of this process for other processes */
1851 if (mm) 1849 if (mm)
1852 allowed -= mm->total_vm / 32; 1850 allowed -= mm->total_vm / 32;
1853 1851
1854 if (percpu_counter_read_positive(&vm_committed_as) < allowed) 1852 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
1855 return 0; 1853 return 0;
1856 1854
1857 error: 1855 error:
1858 vm_unacct_memory(pages); 1856 vm_unacct_memory(pages);
1859 1857
1860 return -ENOMEM; 1858 return -ENOMEM;
1861 } 1859 }
1862 1860
1863 int in_gate_area_no_task(unsigned long addr) 1861 int in_gate_area_no_task(unsigned long addr)
1864 { 1862 {
1865 return 0; 1863 return 0;
1866 } 1864 }
1867 1865
1868 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1866 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1869 { 1867 {
1870 BUG(); 1868 BUG();
1871 return 0; 1869 return 0;
1872 } 1870 }
1873 EXPORT_SYMBOL(filemap_fault); 1871 EXPORT_SYMBOL(filemap_fault);
1874 1872
1875 /* 1873 /*
1876 * Access another process' address space. 1874 * Access another process' address space.
1877 * - source/target buffer must be kernel space 1875 * - source/target buffer must be kernel space
1878 */ 1876 */
1879 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) 1877 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1880 { 1878 {
1881 struct vm_area_struct *vma; 1879 struct vm_area_struct *vma;
1882 struct mm_struct *mm; 1880 struct mm_struct *mm;
1883 1881
1884 if (addr + len < addr) 1882 if (addr + len < addr)
1885 return 0; 1883 return 0;
1886 1884
1887 mm = get_task_mm(tsk); 1885 mm = get_task_mm(tsk);
1888 if (!mm) 1886 if (!mm)
1889 return 0; 1887 return 0;
1890 1888
1891 down_read(&mm->mmap_sem); 1889 down_read(&mm->mmap_sem);
1892 1890
1893 /* the access must start within one of the target process's mappings */ 1891 /* the access must start within one of the target process's mappings */
1894 vma = find_vma(mm, addr); 1892 vma = find_vma(mm, addr);
1895 if (vma) { 1893 if (vma) {
1896 /* don't overrun this mapping */ 1894 /* don't overrun this mapping */
1897 if (addr + len >= vma->vm_end) 1895 if (addr + len >= vma->vm_end)
1898 len = vma->vm_end - addr; 1896 len = vma->vm_end - addr;
1899 1897
1900 /* only read or write mappings where it is permitted */ 1898 /* only read or write mappings where it is permitted */
1901 if (write && vma->vm_flags & VM_MAYWRITE) 1899 if (write && vma->vm_flags & VM_MAYWRITE)
1902 len -= copy_to_user((void *) addr, buf, len); 1900 len -= copy_to_user((void *) addr, buf, len);
1903 else if (!write && vma->vm_flags & VM_MAYREAD) 1901 else if (!write && vma->vm_flags & VM_MAYREAD)
1904 len -= copy_from_user(buf, (void *) addr, len); 1902 len -= copy_from_user(buf, (void *) addr, len);
1905 else 1903 else
1906 len = 0; 1904 len = 0;
1907 } else { 1905 } else {
1908 len = 0; 1906 len = 0;
1909 } 1907 }
1910 1908
1911 up_read(&mm->mmap_sem); 1909 up_read(&mm->mmap_sem);
1912 mmput(mm); 1910 mmput(mm);
1913 return len; 1911 return len;
1914 } 1912 }
1915 1913