Commit c40f6f8bbc4cbd2902671aacd587400ddca62627

Authored by Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-nommu

* git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-nommu:
  NOMMU: Support XIP on initramfs
  NOMMU: Teach kobjsize() about VMA regions.
  FLAT: Don't attempt to expand the userspace stack to fill the space allocated
  FDPIC: Don't attempt to expand the userspace stack to fill the space allocated
  NOMMU: Improve procfs output using per-MM VMAs
  NOMMU: Make mmap allocation page trimming behaviour configurable.
  NOMMU: Make VMAs per MM as for MMU-mode linux
  NOMMU: Delete askedalloc and realalloc variables
  NOMMU: Rename ARM's struct vm_region
  NOMMU: Fix cleanup handling in ramfs_nommu_get_umapped_area()

Showing 29 changed files Side-by-side Diff

Documentation/nommu-mmap.txt
... ... @@ -109,13 +109,19 @@
109 109 FURTHER NOTES ON NO-MMU MMAP
110 110 ============================
111 111  
112   - (*) A request for a private mapping of less than a page in size may not return
113   - a page-aligned buffer. This is because the kernel calls kmalloc() to
114   - allocate the buffer, not get_free_page().
  112 + (*) A request for a private mapping of a file may return a buffer that is not
  113 + page-aligned. This is because XIP may take place, and the data may not be
  114 + paged aligned in the backing store.
115 115  
116   - (*) A list of all the mappings on the system is visible through /proc/maps in
117   - no-MMU mode.
  116 + (*) A request for an anonymous mapping will always be page aligned. If
  117 + possible the size of the request should be a power of two otherwise some
  118 + of the space may be wasted as the kernel must allocate a power-of-2
  119 + granule but will only discard the excess if appropriately configured as
  120 + this has an effect on fragmentation.
118 121  
  122 + (*) A list of all the private copy and anonymous mappings on the system is
  123 + visible through /proc/maps in no-MMU mode.
  124 +
119 125 (*) A list of all the mappings in use by a process is visible through
120 126 /proc/<pid>/maps in no-MMU mode.
121 127  
... ... @@ -242,4 +248,19 @@
242 248 Provision of shared mappings on block device files is exactly the same as for
243 249 character devices. If there isn't a real device underneath, then the driver
244 250 should allocate sufficient contiguous memory to honour any supported mapping.
  251 +
  252 +
  253 +=================================
  254 +ADJUSTING PAGE TRIMMING BEHAVIOUR
  255 +=================================
  256 +
  257 +NOMMU mmap automatically rounds up to the nearest power-of-2 number of pages
  258 +when performing an allocation. This can have adverse effects on memory
  259 +fragmentation, and as such, is left configurable. The default behaviour is to
  260 +aggressively trim allocations and discard any excess pages back in to the page
  261 +allocator. In order to retain finer-grained control over fragmentation, this
  262 +behaviour can either be disabled completely, or bumped up to a higher page
  263 +watermark where trimming begins.
  264 +
  265 +Page trimming behaviour is configurable via the sysctl `vm.nr_trim_pages'.
Documentation/sysctl/vm.txt
... ... @@ -38,6 +38,7 @@
38 38 - numa_zonelist_order
39 39 - nr_hugepages
40 40 - nr_overcommit_hugepages
  41 +- nr_trim_pages (only if CONFIG_MMU=n)
41 42  
42 43 ==============================================================
43 44  
... ... @@ -348,4 +349,21 @@
348 349 nr_hugepages + nr_overcommit_hugepages.
349 350  
350 351 See Documentation/vm/hugetlbpage.txt
  352 +
  353 +==============================================================
  354 +
  355 +nr_trim_pages
  356 +
  357 +This is available only on NOMMU kernels.
  358 +
  359 +This value adjusts the excess page trimming behaviour of power-of-2 aligned
  360 +NOMMU mmap allocations.
  361 +
  362 +A value of 0 disables trimming of allocations entirely, while a value of 1
  363 +trims excess pages aggressively. Any value >= 1 acts as the watermark where
  364 +trimming of allocations is initiated.
  365 +
  366 +The default value is 1.
  367 +
  368 +See Documentation/nommu-mmap.txt for more information.
arch/arm/include/asm/mmu.h
... ... @@ -24,7 +24,6 @@
24 24 * modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com>
25 25 */
26 26 typedef struct {
27   - struct vm_list_struct *vmlist;
28 27 unsigned long end_brk;
29 28 } mm_context_t;
30 29  
arch/arm/mm/dma-mapping.c
... ... @@ -71,7 +71,7 @@
71 71 * the amount of RAM found at boot time.) I would imagine that get_vm_area()
72 72 * would have to initialise this each time prior to calling vm_region_alloc().
73 73 */
74   -struct vm_region {
  74 +struct arm_vm_region {
75 75 struct list_head vm_list;
76 76 unsigned long vm_start;
77 77 unsigned long vm_end;
78 78  
79 79  
80 80  
... ... @@ -79,20 +79,20 @@
79 79 int vm_active;
80 80 };
81 81  
82   -static struct vm_region consistent_head = {
  82 +static struct arm_vm_region consistent_head = {
83 83 .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
84 84 .vm_start = CONSISTENT_BASE,
85 85 .vm_end = CONSISTENT_END,
86 86 };
87 87  
88   -static struct vm_region *
89   -vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
  88 +static struct arm_vm_region *
  89 +arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp)
90 90 {
91 91 unsigned long addr = head->vm_start, end = head->vm_end - size;
92 92 unsigned long flags;
93   - struct vm_region *c, *new;
  93 + struct arm_vm_region *c, *new;
94 94  
95   - new = kmalloc(sizeof(struct vm_region), gfp);
  95 + new = kmalloc(sizeof(struct arm_vm_region), gfp);
96 96 if (!new)
97 97 goto out;
98 98  
99 99  
... ... @@ -127,9 +127,9 @@
127 127 return NULL;
128 128 }
129 129  
130   -static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr)
  130 +static struct arm_vm_region *arm_vm_region_find(struct arm_vm_region *head, unsigned long addr)
131 131 {
132   - struct vm_region *c;
  132 + struct arm_vm_region *c;
133 133  
134 134 list_for_each_entry(c, &head->vm_list, vm_list) {
135 135 if (c->vm_active && c->vm_start == addr)
... ... @@ -149,7 +149,7 @@
149 149 pgprot_t prot)
150 150 {
151 151 struct page *page;
152   - struct vm_region *c;
  152 + struct arm_vm_region *c;
153 153 unsigned long order;
154 154 u64 mask = ISA_DMA_THRESHOLD, limit;
155 155  
... ... @@ -214,7 +214,7 @@
214 214 /*
215 215 * Allocate a virtual address in the consistent mapping region.
216 216 */
217   - c = vm_region_alloc(&consistent_head, size,
  217 + c = arm_vm_region_alloc(&consistent_head, size,
218 218 gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
219 219 if (c) {
220 220 pte_t *pte;
221 221  
... ... @@ -311,13 +311,13 @@
311 311 void *cpu_addr, dma_addr_t dma_addr, size_t size)
312 312 {
313 313 unsigned long flags, user_size, kern_size;
314   - struct vm_region *c;
  314 + struct arm_vm_region *c;
315 315 int ret = -ENXIO;
316 316  
317 317 user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
318 318  
319 319 spin_lock_irqsave(&consistent_lock, flags);
320   - c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
  320 + c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
321 321 spin_unlock_irqrestore(&consistent_lock, flags);
322 322  
323 323 if (c) {
... ... @@ -359,7 +359,7 @@
359 359 */
360 360 void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
361 361 {
362   - struct vm_region *c;
  362 + struct arm_vm_region *c;
363 363 unsigned long flags, addr;
364 364 pte_t *ptep;
365 365 int idx;
... ... @@ -378,7 +378,7 @@
378 378 size = PAGE_ALIGN(size);
379 379  
380 380 spin_lock_irqsave(&consistent_lock, flags);
381   - c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
  381 + c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
382 382 if (!c)
383 383 goto no_area;
384 384  
arch/blackfin/include/asm/mmu.h
... ... @@ -10,7 +10,6 @@
10 10 };
11 11  
12 12 typedef struct {
13   - struct vm_list_struct *vmlist;
14 13 unsigned long end_brk;
15 14 unsigned long stack_start;
16 15  
arch/blackfin/kernel/ptrace.c
... ... @@ -160,15 +160,15 @@
160 160 static inline int is_user_addr_valid(struct task_struct *child,
161 161 unsigned long start, unsigned long len)
162 162 {
163   - struct vm_list_struct *vml;
  163 + struct vm_area_struct *vma;
164 164 struct sram_list_struct *sraml;
165 165  
166 166 /* overflow */
167 167 if (start + len < start)
168 168 return -EIO;
169 169  
170   - for (vml = child->mm->context.vmlist; vml; vml = vml->next)
171   - if (start >= vml->vma->vm_start && start + len < vml->vma->vm_end)
  170 + vma = find_vma(child->mm, start);
  171 + if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
172 172 return 0;
173 173  
174 174 for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next)
arch/blackfin/kernel/traps.c
... ... @@ -32,6 +32,7 @@
32 32 #include <linux/module.h>
33 33 #include <linux/kallsyms.h>
34 34 #include <linux/fs.h>
  35 +#include <linux/rbtree.h>
35 36 #include <asm/traps.h>
36 37 #include <asm/cacheflush.h>
37 38 #include <asm/cplb.h>
... ... @@ -83,6 +84,7 @@
83 84 struct mm_struct *mm;
84 85 unsigned long flags, offset;
85 86 unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic();
  87 + struct rb_node *n;
86 88  
87 89 #ifdef CONFIG_KALLSYMS
88 90 unsigned long symsize;
89 91  
... ... @@ -128,10 +130,11 @@
128 130 if (!mm)
129 131 continue;
130 132  
131   - vml = mm->context.vmlist;
132   - while (vml) {
133   - struct vm_area_struct *vma = vml->vma;
  133 + for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
  134 + struct vm_area_struct *vma;
134 135  
  136 + vma = rb_entry(n, struct vm_area_struct, vm_rb);
  137 +
135 138 if (address >= vma->vm_start && address < vma->vm_end) {
136 139 char _tmpbuf[256];
137 140 char *name = p->comm;
... ... @@ -176,8 +179,6 @@
176 179  
177 180 goto done;
178 181 }
179   -
180   - vml = vml->next;
181 182 }
182 183 if (!in_atomic)
183 184 mmput(mm);
arch/frv/kernel/ptrace.c
... ... @@ -69,7 +69,8 @@
69 69 }
70 70  
71 71 /*
72   - * check that an address falls within the bounds of the target process's memory mappings
  72 + * check that an address falls within the bounds of the target process's memory
  73 + * mappings
73 74 */
74 75 static inline int is_user_addr_valid(struct task_struct *child,
75 76 unsigned long start, unsigned long len)
76 77  
... ... @@ -79,11 +80,11 @@
79 80 return -EIO;
80 81 return 0;
81 82 #else
82   - struct vm_list_struct *vml;
  83 + struct vm_area_struct *vma;
83 84  
84   - for (vml = child->mm->context.vmlist; vml; vml = vml->next)
85   - if (start >= vml->vma->vm_start && start + len <= vml->vma->vm_end)
86   - return 0;
  85 + vma = find_vma(child->mm, start);
  86 + if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
  87 + return 0;
87 88  
88 89 return -EIO;
89 90 #endif
arch/h8300/include/asm/mmu.h
... ... @@ -4,7 +4,6 @@
4 4 /* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
5 5  
6 6 typedef struct {
7   - struct vm_list_struct *vmlist;
8 7 unsigned long end_brk;
9 8 } mm_context_t;
10 9  
arch/m68knommu/include/asm/mmu.h
... ... @@ -4,7 +4,6 @@
4 4 /* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
5 5  
6 6 typedef struct {
7   - struct vm_list_struct *vmlist;
8 7 unsigned long end_brk;
9 8 } mm_context_t;
10 9  
arch/sh/include/asm/mmu.h
... ... @@ -9,7 +9,6 @@
9 9 mm_context_id_t id;
10 10 void *vdso;
11 11 #else
12   - struct vm_list_struct *vmlist;
13 12 unsigned long end_brk;
14 13 #endif
15 14 #ifdef CONFIG_BINFMT_ELF_FDPIC
fs/binfmt_elf_fdpic.c
... ... @@ -168,9 +168,6 @@
168 168 struct elf_fdpic_params exec_params, interp_params;
169 169 struct elf_phdr *phdr;
170 170 unsigned long stack_size, entryaddr;
171   -#ifndef CONFIG_MMU
172   - unsigned long fullsize;
173   -#endif
174 171 #ifdef ELF_FDPIC_PLAT_INIT
175 172 unsigned long dynaddr;
176 173 #endif
... ... @@ -390,11 +387,6 @@
390 387 goto error_kill;
391 388 }
392 389  
393   - /* expand the stack mapping to use up the entire allocation granule */
394   - fullsize = kobjsize((char *) current->mm->start_brk);
395   - if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size,
396   - fullsize, 0, 0)))
397   - stack_size = fullsize;
398 390 up_write(&current->mm->mmap_sem);
399 391  
400 392 current->mm->brk = current->mm->start_brk;
401 393  
... ... @@ -1567,11 +1559,9 @@
1567 1559 static int elf_fdpic_dump_segments(struct file *file, size_t *size,
1568 1560 unsigned long *limit, unsigned long mm_flags)
1569 1561 {
1570   - struct vm_list_struct *vml;
  1562 + struct vm_area_struct *vma;
1571 1563  
1572   - for (vml = current->mm->context.vmlist; vml; vml = vml->next) {
1573   - struct vm_area_struct *vma = vml->vma;
1574   -
  1564 + for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1575 1565 if (!maydump(vma, mm_flags))
1576 1566 continue;
1577 1567  
... ... @@ -1617,9 +1607,6 @@
1617 1607 elf_fpxregset_t *xfpu = NULL;
1618 1608 #endif
1619 1609 int thread_status_size = 0;
1620   -#ifndef CONFIG_MMU
1621   - struct vm_list_struct *vml;
1622   -#endif
1623 1610 elf_addr_t *auxv;
1624 1611 unsigned long mm_flags;
1625 1612  
1626 1613  
... ... @@ -1685,13 +1672,7 @@
1685 1672 fill_prstatus(prstatus, current, signr);
1686 1673 elf_core_copy_regs(&prstatus->pr_reg, regs);
1687 1674  
1688   -#ifdef CONFIG_MMU
1689 1675 segs = current->mm->map_count;
1690   -#else
1691   - segs = 0;
1692   - for (vml = current->mm->context.vmlist; vml; vml = vml->next)
1693   - segs++;
1694   -#endif
1695 1676 #ifdef ELF_CORE_EXTRA_PHDRS
1696 1677 segs += ELF_CORE_EXTRA_PHDRS;
1697 1678 #endif
1698 1679  
... ... @@ -1766,19 +1747,9 @@
1766 1747 mm_flags = current->mm->flags;
1767 1748  
1768 1749 /* write program headers for segments dump */
1769   - for (
1770   -#ifdef CONFIG_MMU
1771   - vma = current->mm->mmap; vma; vma = vma->vm_next
1772   -#else
1773   - vml = current->mm->context.vmlist; vml; vml = vml->next
1774   -#endif
1775   - ) {
  1750 + for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1776 1751 struct elf_phdr phdr;
1777 1752 size_t sz;
1778   -
1779   -#ifndef CONFIG_MMU
1780   - vma = vml->vma;
1781   -#endif
1782 1753  
1783 1754 sz = vma->vm_end - vma->vm_start;
1784 1755  
... ... @@ -417,8 +417,8 @@
417 417 unsigned long textpos = 0, datapos = 0, result;
418 418 unsigned long realdatastart = 0;
419 419 unsigned long text_len, data_len, bss_len, stack_len, flags;
420   - unsigned long len, reallen, memp = 0;
421   - unsigned long extra, rlim;
  420 + unsigned long len, memp = 0;
  421 + unsigned long memp_size, extra, rlim;
422 422 unsigned long *reloc = 0, *rp;
423 423 struct inode *inode;
424 424 int i, rev, relocs = 0;
425 425  
... ... @@ -543,17 +543,10 @@
543 543 }
544 544  
545 545 len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
  546 + len = PAGE_ALIGN(len);
546 547 down_write(&current->mm->mmap_sem);
547 548 realdatastart = do_mmap(0, 0, len,
548 549 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
549   - /* Remap to use all availabe slack region space */
550   - if (realdatastart && (realdatastart < (unsigned long)-4096)) {
551   - reallen = kobjsize((void *)realdatastart);
552   - if (reallen > len) {
553   - realdatastart = do_mremap(realdatastart, len,
554   - reallen, MREMAP_FIXED, realdatastart);
555   - }
556   - }
557 550 up_write(&current->mm->mmap_sem);
558 551  
559 552 if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
560 553  
561 554  
... ... @@ -591,21 +584,14 @@
591 584  
592 585 reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
593 586 memp = realdatastart;
594   -
  587 + memp_size = len;
595 588 } else {
596 589  
597 590 len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
  591 + len = PAGE_ALIGN(len);
598 592 down_write(&current->mm->mmap_sem);
599 593 textpos = do_mmap(0, 0, len,
600 594 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
601   - /* Remap to use all availabe slack region space */
602   - if (textpos && (textpos < (unsigned long) -4096)) {
603   - reallen = kobjsize((void *)textpos);
604   - if (reallen > len) {
605   - textpos = do_mremap(textpos, len, reallen,
606   - MREMAP_FIXED, textpos);
607   - }
608   - }
609 595 up_write(&current->mm->mmap_sem);
610 596  
611 597 if (!textpos || textpos >= (unsigned long) -4096) {
... ... @@ -622,7 +608,7 @@
622 608 reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) +
623 609 MAX_SHARED_LIBS * sizeof(unsigned long));
624 610 memp = textpos;
625   -
  611 + memp_size = len;
626 612 #ifdef CONFIG_BINFMT_ZFLAT
627 613 /*
628 614 * load it all in and treat it like a RAM load from now on
629 615  
... ... @@ -680,10 +666,12 @@
680 666 * set up the brk stuff, uses any slack left in data/bss/stack
681 667 * allocation. We put the brk after the bss (between the bss
682 668 * and stack) like other platforms.
  669 + * Userspace code relies on the stack pointer starting out at
  670 + * an address right at the end of a page.
683 671 */
684 672 current->mm->start_brk = datapos + data_len + bss_len;
685 673 current->mm->brk = (current->mm->start_brk + 3) & ~3;
686   - current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len;
  674 + current->mm->context.end_brk = memp + memp_size - stack_len;
687 675 }
688 676  
689 677 if (flags & FLAT_FLAG_KTRACE)
... ... @@ -790,8 +778,8 @@
790 778  
791 779 /* zero the BSS, BRK and stack areas */
792 780 memset((void*)(datapos + data_len), 0, bss_len +
793   - (memp + kobjsize((void *) memp) - stack_len - /* end brk */
794   - libinfo->lib_list[id].start_brk) + /* start brk */
  781 + (memp + memp_size - stack_len - /* end brk */
  782 + libinfo->lib_list[id].start_brk) + /* start brk */
795 783 stack_len);
796 784  
797 785 return 0;
... ... @@ -41,8 +41,6 @@
41 41 (vmi)->used = 0; \
42 42 (vmi)->largest_chunk = 0; \
43 43 } while(0)
44   -
45   -extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
46 44 #endif
47 45  
48 46 extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
... ... @@ -74,6 +74,9 @@
74 74 "LowTotal: %8lu kB\n"
75 75 "LowFree: %8lu kB\n"
76 76 #endif
  77 +#ifndef CONFIG_MMU
  78 + "MmapCopy: %8lu kB\n"
  79 +#endif
77 80 "SwapTotal: %8lu kB\n"
78 81 "SwapFree: %8lu kB\n"
79 82 "Dirty: %8lu kB\n"
... ... @@ -115,6 +118,9 @@
115 118 K(i.freehigh),
116 119 K(i.totalram-i.totalhigh),
117 120 K(i.freeram-i.freehigh),
  121 +#endif
  122 +#ifndef CONFIG_MMU
  123 + K((unsigned long) atomic_read(&mmap_pages_allocated)),
118 124 #endif
119 125 K(i.totalswap),
120 126 K(i.freeswap),
... ... @@ -33,33 +33,33 @@
33 33 #include "internal.h"
34 34  
35 35 /*
36   - * display a single VMA to a sequenced file
  36 + * display a single region to a sequenced file
37 37 */
38   -int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
  38 +static int nommu_region_show(struct seq_file *m, struct vm_region *region)
39 39 {
40 40 unsigned long ino = 0;
41 41 struct file *file;
42 42 dev_t dev = 0;
43 43 int flags, len;
44 44  
45   - flags = vma->vm_flags;
46   - file = vma->vm_file;
  45 + flags = region->vm_flags;
  46 + file = region->vm_file;
47 47  
48 48 if (file) {
49   - struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
  49 + struct inode *inode = region->vm_file->f_path.dentry->d_inode;
50 50 dev = inode->i_sb->s_dev;
51 51 ino = inode->i_ino;
52 52 }
53 53  
54 54 seq_printf(m,
55 55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
56   - vma->vm_start,
57   - vma->vm_end,
  56 + region->vm_start,
  57 + region->vm_end,
58 58 flags & VM_READ ? 'r' : '-',
59 59 flags & VM_WRITE ? 'w' : '-',
60 60 flags & VM_EXEC ? 'x' : '-',
61 61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
62   - ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
  62 + ((loff_t)region->vm_pgoff) << PAGE_SHIFT,
63 63 MAJOR(dev), MINOR(dev), ino, &len);
64 64  
65 65 if (file) {
66 66  
67 67  
68 68  
69 69  
70 70  
71 71  
72 72  
73 73  
74 74  
75 75  
76 76  
77 77  
78 78  
79 79  
80 80  
... ... @@ -75,61 +75,54 @@
75 75 }
76 76  
77 77 /*
78   - * display a list of all the VMAs the kernel knows about
  78 + * display a list of all the REGIONs the kernel knows about
79 79 * - nommu kernals have a single flat list
80 80 */
81   -static int nommu_vma_list_show(struct seq_file *m, void *v)
  81 +static int nommu_region_list_show(struct seq_file *m, void *_p)
82 82 {
83   - struct vm_area_struct *vma;
  83 + struct rb_node *p = _p;
84 84  
85   - vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
86   - return nommu_vma_show(m, vma);
  85 + return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb));
87 86 }
88 87  
89   -static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
  88 +static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos)
90 89 {
91   - struct rb_node *_rb;
  90 + struct rb_node *p;
92 91 loff_t pos = *_pos;
93   - void *next = NULL;
94 92  
95   - down_read(&nommu_vma_sem);
  93 + down_read(&nommu_region_sem);
96 94  
97   - for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) {
98   - if (pos == 0) {
99   - next = _rb;
100   - break;
101   - }
102   - pos--;
103   - }
104   -
105   - return next;
  95 + for (p = rb_first(&nommu_region_tree); p; p = rb_next(p))
  96 + if (pos-- == 0)
  97 + return p;
  98 + return NULL;
106 99 }
107 100  
108   -static void nommu_vma_list_stop(struct seq_file *m, void *v)
  101 +static void nommu_region_list_stop(struct seq_file *m, void *v)
109 102 {
110   - up_read(&nommu_vma_sem);
  103 + up_read(&nommu_region_sem);
111 104 }
112 105  
113   -static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos)
  106 +static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
114 107 {
115 108 (*pos)++;
116 109 return rb_next((struct rb_node *) v);
117 110 }
118 111  
119   -static const struct seq_operations proc_nommu_vma_list_seqop = {
120   - .start = nommu_vma_list_start,
121   - .next = nommu_vma_list_next,
122   - .stop = nommu_vma_list_stop,
123   - .show = nommu_vma_list_show
  112 +static struct seq_operations proc_nommu_region_list_seqop = {
  113 + .start = nommu_region_list_start,
  114 + .next = nommu_region_list_next,
  115 + .stop = nommu_region_list_stop,
  116 + .show = nommu_region_list_show
124 117 };
125 118  
126   -static int proc_nommu_vma_list_open(struct inode *inode, struct file *file)
  119 +static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
127 120 {
128   - return seq_open(file, &proc_nommu_vma_list_seqop);
  121 + return seq_open(file, &proc_nommu_region_list_seqop);
129 122 }
130 123  
131   -static const struct file_operations proc_nommu_vma_list_operations = {
132   - .open = proc_nommu_vma_list_open,
  124 +static const struct file_operations proc_nommu_region_list_operations = {
  125 + .open = proc_nommu_region_list_open,
133 126 .read = seq_read,
134 127 .llseek = seq_lseek,
135 128 .release = seq_release,
... ... @@ -137,7 +130,7 @@
137 130  
138 131 static int __init proc_nommu_init(void)
139 132 {
140   - proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations);
  133 + proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
141 134 return 0;
142 135 }
143 136  
fs/proc/task_nommu.c
... ... @@ -15,25 +15,32 @@
15 15 */
16 16 void task_mem(struct seq_file *m, struct mm_struct *mm)
17 17 {
18   - struct vm_list_struct *vml;
19   - unsigned long bytes = 0, sbytes = 0, slack = 0;
  18 + struct vm_area_struct *vma;
  19 + struct vm_region *region;
  20 + struct rb_node *p;
  21 + unsigned long bytes = 0, sbytes = 0, slack = 0, size;
20 22  
21 23 down_read(&mm->mmap_sem);
22   - for (vml = mm->context.vmlist; vml; vml = vml->next) {
23   - if (!vml->vma)
24   - continue;
  24 + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
  25 + vma = rb_entry(p, struct vm_area_struct, vm_rb);
25 26  
26   - bytes += kobjsize(vml);
  27 + bytes += kobjsize(vma);
  28 +
  29 + region = vma->vm_region;
  30 + if (region) {
  31 + size = kobjsize(region);
  32 + size += region->vm_end - region->vm_start;
  33 + } else {
  34 + size = vma->vm_end - vma->vm_start;
  35 + }
  36 +
27 37 if (atomic_read(&mm->mm_count) > 1 ||
28   - atomic_read(&vml->vma->vm_usage) > 1
29   - ) {
30   - sbytes += kobjsize((void *) vml->vma->vm_start);
31   - sbytes += kobjsize(vml->vma);
  38 + vma->vm_flags & VM_MAYSHARE) {
  39 + sbytes += size;
32 40 } else {
33   - bytes += kobjsize((void *) vml->vma->vm_start);
34   - bytes += kobjsize(vml->vma);
35   - slack += kobjsize((void *) vml->vma->vm_start) -
36   - (vml->vma->vm_end - vml->vma->vm_start);
  41 + bytes += size;
  42 + if (region)
  43 + slack = region->vm_end - vma->vm_end;
37 44 }
38 45 }
39 46  
40 47  
... ... @@ -70,13 +77,14 @@
70 77  
71 78 unsigned long task_vsize(struct mm_struct *mm)
72 79 {
73   - struct vm_list_struct *tbp;
  80 + struct vm_area_struct *vma;
  81 + struct rb_node *p;
74 82 unsigned long vsize = 0;
75 83  
76 84 down_read(&mm->mmap_sem);
77   - for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) {
78   - if (tbp->vma)
79   - vsize += kobjsize((void *) tbp->vma->vm_start);
  85 + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
  86 + vma = rb_entry(p, struct vm_area_struct, vm_rb);
  87 + vsize += vma->vm_end - vma->vm_start;
80 88 }
81 89 up_read(&mm->mmap_sem);
82 90 return vsize;
83 91  
... ... @@ -85,15 +93,19 @@
85 93 int task_statm(struct mm_struct *mm, int *shared, int *text,
86 94 int *data, int *resident)
87 95 {
88   - struct vm_list_struct *tbp;
  96 + struct vm_area_struct *vma;
  97 + struct vm_region *region;
  98 + struct rb_node *p;
89 99 int size = kobjsize(mm);
90 100  
91 101 down_read(&mm->mmap_sem);
92   - for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) {
93   - size += kobjsize(tbp);
94   - if (tbp->vma) {
95   - size += kobjsize(tbp->vma);
96   - size += kobjsize((void *) tbp->vma->vm_start);
  102 + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
  103 + vma = rb_entry(p, struct vm_area_struct, vm_rb);
  104 + size += kobjsize(vma);
  105 + region = vma->vm_region;
  106 + if (region) {
  107 + size += kobjsize(region);
  108 + size += region->vm_end - region->vm_start;
97 109 }
98 110 }
99 111  
100 112  
101 113  
102 114  
103 115  
104 116  
... ... @@ -105,20 +117,62 @@
105 117 }
106 118  
107 119 /*
  120 + * display a single VMA to a sequenced file
  121 + */
  122 +static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
  123 +{
  124 + unsigned long ino = 0;
  125 + struct file *file;
  126 + dev_t dev = 0;
  127 + int flags, len;
  128 +
  129 + flags = vma->vm_flags;
  130 + file = vma->vm_file;
  131 +
  132 + if (file) {
  133 + struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
  134 + dev = inode->i_sb->s_dev;
  135 + ino = inode->i_ino;
  136 + }
  137 +
  138 + seq_printf(m,
  139 + "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
  140 + vma->vm_start,
  141 + vma->vm_end,
  142 + flags & VM_READ ? 'r' : '-',
  143 + flags & VM_WRITE ? 'w' : '-',
  144 + flags & VM_EXEC ? 'x' : '-',
  145 + flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
  146 + vma->vm_pgoff << PAGE_SHIFT,
  147 + MAJOR(dev), MINOR(dev), ino, &len);
  148 +
  149 + if (file) {
  150 + len = 25 + sizeof(void *) * 6 - len;
  151 + if (len < 1)
  152 + len = 1;
  153 + seq_printf(m, "%*c", len, ' ');
  154 + seq_path(m, &file->f_path, "");
  155 + }
  156 +
  157 + seq_putc(m, '\n');
  158 + return 0;
  159 +}
  160 +
  161 +/*
108 162 * display mapping lines for a particular process's /proc/pid/maps
109 163 */
110   -static int show_map(struct seq_file *m, void *_vml)
  164 +static int show_map(struct seq_file *m, void *_p)
111 165 {
112   - struct vm_list_struct *vml = _vml;
  166 + struct rb_node *p = _p;
113 167  
114   - return nommu_vma_show(m, vml->vma);
  168 + return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
115 169 }
116 170  
117 171 static void *m_start(struct seq_file *m, loff_t *pos)
118 172 {
119 173 struct proc_maps_private *priv = m->private;
120   - struct vm_list_struct *vml;
121 174 struct mm_struct *mm;
  175 + struct rb_node *p;
122 176 loff_t n = *pos;
123 177  
124 178 /* pin the task and mm whilst we play with them */
125 179  
... ... @@ -134,9 +188,9 @@
134 188 }
135 189  
136 190 /* start from the Nth VMA */
137   - for (vml = mm->context.vmlist; vml; vml = vml->next)
  191 + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
138 192 if (n-- == 0)
139   - return vml;
  193 + return p;
140 194 return NULL;
141 195 }
142 196  
143 197  
144 198  
... ... @@ -152,12 +206,12 @@
152 206 }
153 207 }
154 208  
155   -static void *m_next(struct seq_file *m, void *_vml, loff_t *pos)
  209 +static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
156 210 {
157   - struct vm_list_struct *vml = _vml;
  211 + struct rb_node *p = _p;
158 212  
159 213 (*pos)++;
160   - return vml ? vml->next : NULL;
  214 + return p ? rb_next(p) : NULL;
161 215 }
162 216  
163 217 static const struct seq_operations proc_pid_maps_ops = {
fs/ramfs/file-nommu.c
... ... @@ -262,11 +262,11 @@
262 262 ret = -ENOMEM;
263 263 pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
264 264 if (!pages)
265   - goto out;
  265 + goto out_free;
266 266  
267 267 nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages);
268 268 if (nr != lpages)
269   - goto out; /* leave if some pages were missing */
  269 + goto out_free_pages; /* leave if some pages were missing */
270 270  
271 271 /* check the pages for physical adjacency */
272 272 ptr = pages;
273 273  
... ... @@ -274,19 +274,18 @@
274 274 page++;
275 275 for (loop = lpages; loop > 1; loop--)
276 276 if (*ptr++ != page++)
277   - goto out;
  277 + goto out_free_pages;
278 278  
279 279 /* okay - all conditions fulfilled */
280 280 ret = (unsigned long) page_address(pages[0]);
281 281  
282   - out:
283   - if (pages) {
284   - ptr = pages;
285   - for (loop = lpages; loop > 0; loop--)
286   - put_page(*ptr++);
287   - kfree(pages);
288   - }
289   -
  282 +out_free_pages:
  283 + ptr = pages;
  284 + for (loop = nr; loop > 0; loop--)
  285 + put_page(*ptr++);
  286 +out_free:
  287 + kfree(pages);
  288 +out:
290 289 return ret;
291 290 }
292 291  
include/asm-frv/mmu.h
... ... @@ -22,7 +22,6 @@
22 22 unsigned long dtlb_ptd_mapping; /* [DAMR5] PTD mapping for dtlb cached PGE */
23 23  
24 24 #else
25   - struct vm_list_struct *vmlist;
26 25 unsigned long end_brk;
27 26  
28 27 #endif
include/asm-m32r/mmu.h
... ... @@ -4,7 +4,6 @@
4 4 #if !defined(CONFIG_MMU)
5 5  
6 6 typedef struct {
7   - struct vm_list_struct *vmlist;
8 7 unsigned long end_brk;
9 8 } mm_context_t;
10 9  
... ... @@ -56,19 +56,9 @@
56 56  
57 57 extern struct kmem_cache *vm_area_cachep;
58 58  
59   -/*
60   - * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is
61   - * disabled, then there's a single shared list of VMAs maintained by the
62   - * system, and mm's subscribe to these individually
63   - */
64   -struct vm_list_struct {
65   - struct vm_list_struct *next;
66   - struct vm_area_struct *vma;
67   -};
68   -
69 59 #ifndef CONFIG_MMU
70   -extern struct rb_root nommu_vma_tree;
71   -extern struct rw_semaphore nommu_vma_sem;
  60 +extern struct rb_root nommu_region_tree;
  61 +extern struct rw_semaphore nommu_region_sem;
72 62  
73 63 extern unsigned int kobjsize(const void *objp);
74 64 #endif
... ... @@ -1061,6 +1051,7 @@
1061 1051 unsigned long, enum memmap_context);
1062 1052 extern void setup_per_zone_pages_min(void);
1063 1053 extern void mem_init(void);
  1054 +extern void __init mmap_init(void);
1064 1055 extern void show_mem(void);
1065 1056 extern void si_meminfo(struct sysinfo * val);
1066 1057 extern void si_meminfo_node(struct sysinfo *val, int nid);
... ... @@ -1071,6 +1062,9 @@
1071 1062 #else
1072 1063 static inline void setup_per_cpu_pageset(void) {}
1073 1064 #endif
  1065 +
  1066 +/* nommu.c */
  1067 +extern atomic_t mmap_pages_allocated;
1074 1068  
1075 1069 /* prio_tree.c */
1076 1070 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
include/linux/mm_types.h
... ... @@ -97,6 +97,23 @@
97 97 };
98 98  
99 99 /*
  100 + * A region containing a mapping of a non-memory backed file under NOMMU
  101 + * conditions. These are held in a global tree and are pinned by the VMAs that
  102 + * map parts of them.
  103 + */
  104 +struct vm_region {
  105 + struct rb_node vm_rb; /* link in global region tree */
  106 + unsigned long vm_flags; /* VMA vm_flags */
  107 + unsigned long vm_start; /* start address of region */
  108 + unsigned long vm_end; /* region initialised to here */
  109 + unsigned long vm_top; /* region allocated to here */
  110 + unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
  111 + struct file *vm_file; /* the backing file or NULL */
  112 +
  113 + atomic_t vm_usage; /* region usage count */
  114 +};
  115 +
  116 +/*
100 117 * This struct defines a memory VMM memory area. There is one of these
101 118 * per VM-area/task. A VM area is any part of the process virtual memory
102 119 * space that has a special rule for the page-fault handlers (ie a shared
... ... @@ -152,7 +169,7 @@
152 169 unsigned long vm_truncate_count;/* truncate_count or restart_addr */
153 170  
154 171 #ifndef CONFIG_MMU
155   - atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */
  172 + struct vm_region *vm_region; /* NOMMU mapping region */
156 173 #endif
157 174 #ifdef CONFIG_NUMA
158 175 struct mempolicy *vm_policy; /* NUMA policy for the VMA */
... ... @@ -317,6 +317,7 @@
317 317 if (wfd >= 0) {
318 318 sys_fchown(wfd, uid, gid);
319 319 sys_fchmod(wfd, mode);
  320 + sys_ftruncate(wfd, body_len);
320 321 vcollected = kstrdup(collected, GFP_KERNEL);
321 322 state = CopyFile;
322 323 }
... ... @@ -990,6 +990,7 @@
990 990 */
991 991 vma = find_vma(mm, addr);
992 992  
  993 +#ifdef CONFIG_MMU
993 994 while (vma) {
994 995 next = vma->vm_next;
995 996  
... ... @@ -1033,6 +1034,17 @@
1033 1034 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1034 1035 vma = next;
1035 1036 }
  1037 +
  1038 +#else /* CONFIG_MMU */
  1039 + /* under NOMMU conditions, the exact address to be destroyed must be
  1040 + * given */
  1041 + retval = -EINVAL;
  1042 + if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
  1043 + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
  1044 + retval = 0;
  1045 + }
  1046 +
  1047 +#endif
1036 1048  
1037 1049 up_write(&mm->mmap_sem);
1038 1050 return retval;
... ... @@ -1481,12 +1481,10 @@
1481 1481 fs_cachep = kmem_cache_create("fs_cache",
1482 1482 sizeof(struct fs_struct), 0,
1483 1483 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1484   - vm_area_cachep = kmem_cache_create("vm_area_struct",
1485   - sizeof(struct vm_area_struct), 0,
1486   - SLAB_PANIC, NULL);
1487 1484 mm_cachep = kmem_cache_create("mm_struct",
1488 1485 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1489 1486 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
  1487 + mmap_init();
1490 1488 }
1491 1489  
1492 1490 /*
... ... @@ -82,6 +82,9 @@
82 82 extern int compat_log;
83 83 extern int latencytop_enabled;
84 84 extern int sysctl_nr_open_min, sysctl_nr_open_max;
  85 +#ifndef CONFIG_MMU
  86 +extern int sysctl_nr_trim_pages;
  87 +#endif
85 88 #ifdef CONFIG_RCU_TORTURE_TEST
86 89 extern int rcutorture_runnable;
87 90 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
... ... @@ -1101,6 +1104,17 @@
1101 1104 .maxlen = sizeof(sysctl_max_map_count),
1102 1105 .mode = 0644,
1103 1106 .proc_handler = &proc_dointvec
  1107 + },
  1108 +#else
  1109 + {
  1110 + .ctl_name = CTL_UNNUMBERED,
  1111 + .procname = "nr_trim_pages",
  1112 + .data = &sysctl_nr_trim_pages,
  1113 + .maxlen = sizeof(sysctl_nr_trim_pages),
  1114 + .mode = 0644,
  1115 + .proc_handler = &proc_dointvec_minmax,
  1116 + .strategy = &sysctl_intvec,
  1117 + .extra1 = &zero,
1104 1118 },
1105 1119 #endif
1106 1120 {
... ... @@ -512,6 +512,13 @@
512 512  
513 513 If unsure, say N.
514 514  
  515 +config DEBUG_NOMMU_REGIONS
  516 + bool "Debug the global anon/private NOMMU mapping region tree"
  517 + depends on DEBUG_KERNEL && !MMU
  518 + help
  519 + This option causes the global tree of anonymous and private mapping
  520 + regions to be regularly checked for invalid topology.
  521 +
515 522 config DEBUG_WRITECOUNT
516 523 bool "Debug filesystem writers count"
517 524 depends on DEBUG_KERNEL
... ... @@ -2472,4 +2472,14 @@
2472 2472  
2473 2473 mutex_unlock(&mm_all_locks_mutex);
2474 2474 }
  2475 +
  2476 +/*
  2477 + * initialise the VMA slab
  2478 + */
  2479 +void __init mmap_init(void)
  2480 +{
  2481 + vm_area_cachep = kmem_cache_create("vm_area_struct",
  2482 + sizeof(struct vm_area_struct), 0,
  2483 + SLAB_PANIC, NULL);
  2484 +}
Changes suppressed. Click to show
... ... @@ -6,11 +6,11 @@
6 6 *
7 7 * See Documentation/nommu-mmap.txt
8 8 *
9   - * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
  9 + * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com>
10 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
13   - * Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org>
  13 + * Copyright (c) 2007-2008 Paul Mundt <lethal@linux-sh.org>
14 14 */
15 15  
16 16 #include <linux/module.h>
17 17  
18 18  
19 19  
20 20  
21 21  
... ... @@ -33,26 +33,51 @@
33 33 #include <asm/uaccess.h>
34 34 #include <asm/tlb.h>
35 35 #include <asm/tlbflush.h>
  36 +#include "internal.h"
36 37  
  38 +static inline __attribute__((format(printf, 1, 2)))
  39 +void no_printk(const char *fmt, ...)
  40 +{
  41 +}
  42 +
  43 +#if 0
  44 +#define kenter(FMT, ...) \
  45 + printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
  46 +#define kleave(FMT, ...) \
  47 + printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
  48 +#define kdebug(FMT, ...) \
  49 + printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__)
  50 +#else
  51 +#define kenter(FMT, ...) \
  52 + no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
  53 +#define kleave(FMT, ...) \
  54 + no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
  55 +#define kdebug(FMT, ...) \
  56 + no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__)
  57 +#endif
  58 +
37 59 #include "internal.h"
38 60  
39 61 void *high_memory;
40 62 struct page *mem_map;
41 63 unsigned long max_mapnr;
42 64 unsigned long num_physpages;
43   -unsigned long askedalloc, realalloc;
44 65 atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
45 66 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
46 67 int sysctl_overcommit_ratio = 50; /* default is 50% */
47 68 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
  69 +int sysctl_nr_trim_pages = 1; /* page trimming behaviour */
48 70 int heap_stack_gap = 0;
49 71  
  72 +atomic_t mmap_pages_allocated;
  73 +
50 74 EXPORT_SYMBOL(mem_map);
51 75 EXPORT_SYMBOL(num_physpages);
52 76  
53   -/* list of shareable VMAs */
54   -struct rb_root nommu_vma_tree = RB_ROOT;
55   -DECLARE_RWSEM(nommu_vma_sem);
  77 +/* list of mapped, potentially shareable regions */
  78 +static struct kmem_cache *vm_region_jar;
  79 +struct rb_root nommu_region_tree = RB_ROOT;
  80 +DECLARE_RWSEM(nommu_region_sem);
56 81  
57 82 struct vm_operations_struct generic_file_vm_ops = {
58 83 };
... ... @@ -124,6 +149,20 @@
124 149 return ksize(objp);
125 150  
126 151 /*
  152 + * If it's not a compound page, see if we have a matching VMA
  153 + * region. This test is intentionally done in reverse order,
  154 + * so if there's no VMA, we still fall through and hand back
  155 + * PAGE_SIZE for 0-order pages.
  156 + */
  157 + if (!PageCompound(page)) {
  158 + struct vm_area_struct *vma;
  159 +
  160 + vma = find_vma(current->mm, (unsigned long)objp);
  161 + if (vma)
  162 + return vma->vm_end - vma->vm_start;
  163 + }
  164 +
  165 + /*
127 166 * The ksize() function is only guaranteed to work for pointers
128 167 * returned by kmalloc(). So handle arbitrary pointers here.
129 168 */
130 169  
131 170  
132 171  
133 172  
134 173  
135 174  
136 175  
137 176  
138 177  
139 178  
140 179  
141 180  
142 181  
143 182  
144 183  
145 184  
146 185  
147 186  
148 187  
149 188  
150 189  
151 190  
152 191  
153 192  
154 193  
155 194  
156 195  
157 196  
158 197  
159 198  
160 199  
161 200  
162 201  
163 202  
164 203  
165 204  
... ... @@ -401,130 +440,179 @@
401 440 return mm->brk = brk;
402 441 }
403 442  
404   -#ifdef DEBUG
405   -static void show_process_blocks(void)
  443 +/*
  444 + * initialise the VMA and region record slabs
  445 + */
  446 +void __init mmap_init(void)
406 447 {
407   - struct vm_list_struct *vml;
408   -
409   - printk("Process blocks %d:", current->pid);
410   -
411   - for (vml = &current->mm->context.vmlist; vml; vml = vml->next) {
412   - printk(" %p: %p", vml, vml->vma);
413   - if (vml->vma)
414   - printk(" (%d @%lx #%d)",
415   - kobjsize((void *) vml->vma->vm_start),
416   - vml->vma->vm_start,
417   - atomic_read(&vml->vma->vm_usage));
418   - printk(vml->next ? " ->" : ".\n");
419   - }
  448 + vm_region_jar = kmem_cache_create("vm_region_jar",
  449 + sizeof(struct vm_region), 0,
  450 + SLAB_PANIC, NULL);
  451 + vm_area_cachep = kmem_cache_create("vm_area_struct",
  452 + sizeof(struct vm_area_struct), 0,
  453 + SLAB_PANIC, NULL);
420 454 }
421   -#endif /* DEBUG */
422 455  
423 456 /*
424   - * add a VMA into a process's mm_struct in the appropriate place in the list
425   - * - should be called with mm->mmap_sem held writelocked
  457 + * validate the region tree
  458 + * - the caller must hold the region lock
426 459 */
427   -static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml)
  460 +#ifdef CONFIG_DEBUG_NOMMU_REGIONS
  461 +static noinline void validate_nommu_regions(void)
428 462 {
429   - struct vm_list_struct **ppv;
  463 + struct vm_region *region, *last;
  464 + struct rb_node *p, *lastp;
430 465  
431   - for (ppv = &current->mm->context.vmlist; *ppv; ppv = &(*ppv)->next)
432   - if ((*ppv)->vma->vm_start > vml->vma->vm_start)
433   - break;
  466 + lastp = rb_first(&nommu_region_tree);
  467 + if (!lastp)
  468 + return;
434 469  
435   - vml->next = *ppv;
436   - *ppv = vml;
  470 + last = rb_entry(lastp, struct vm_region, vm_rb);
  471 + if (unlikely(last->vm_end <= last->vm_start))
  472 + BUG();
  473 + if (unlikely(last->vm_top < last->vm_end))
  474 + BUG();
  475 +
  476 + while ((p = rb_next(lastp))) {
  477 + region = rb_entry(p, struct vm_region, vm_rb);
  478 + last = rb_entry(lastp, struct vm_region, vm_rb);
  479 +
  480 + if (unlikely(region->vm_end <= region->vm_start))
  481 + BUG();
  482 + if (unlikely(region->vm_top < region->vm_end))
  483 + BUG();
  484 + if (unlikely(region->vm_start < last->vm_top))
  485 + BUG();
  486 +
  487 + lastp = p;
  488 + }
437 489 }
  490 +#else
  491 +#define validate_nommu_regions() do {} while(0)
  492 +#endif
438 493  
439 494 /*
440   - * look up the first VMA in which addr resides, NULL if none
441   - * - should be called with mm->mmap_sem at least held readlocked
  495 + * add a region into the global tree
442 496 */
443   -struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
  497 +static void add_nommu_region(struct vm_region *region)
444 498 {
445   - struct vm_list_struct *loop, *vml;
  499 + struct vm_region *pregion;
  500 + struct rb_node **p, *parent;
446 501  
447   - /* search the vm_start ordered list */
448   - vml = NULL;
449   - for (loop = mm->context.vmlist; loop; loop = loop->next) {
450   - if (loop->vma->vm_start > addr)
451   - break;
452   - vml = loop;
  502 + validate_nommu_regions();
  503 +
  504 + BUG_ON(region->vm_start & ~PAGE_MASK);
  505 +
  506 + parent = NULL;
  507 + p = &nommu_region_tree.rb_node;
  508 + while (*p) {
  509 + parent = *p;
  510 + pregion = rb_entry(parent, struct vm_region, vm_rb);
  511 + if (region->vm_start < pregion->vm_start)
  512 + p = &(*p)->rb_left;
  513 + else if (region->vm_start > pregion->vm_start)
  514 + p = &(*p)->rb_right;
  515 + else if (pregion == region)
  516 + return;
  517 + else
  518 + BUG();
453 519 }
454 520  
455   - if (vml && vml->vma->vm_end > addr)
456   - return vml->vma;
  521 + rb_link_node(&region->vm_rb, parent, p);
  522 + rb_insert_color(&region->vm_rb, &nommu_region_tree);
457 523  
458   - return NULL;
  524 + validate_nommu_regions();
459 525 }
460   -EXPORT_SYMBOL(find_vma);
461 526  
462 527 /*
463   - * find a VMA
464   - * - we don't extend stack VMAs under NOMMU conditions
  528 + * delete a region from the global tree
465 529 */
466   -struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
  530 +static void delete_nommu_region(struct vm_region *region)
467 531 {
468   - return find_vma(mm, addr);
469   -}
  532 + BUG_ON(!nommu_region_tree.rb_node);
470 533  
471   -int expand_stack(struct vm_area_struct *vma, unsigned long address)
472   -{
473   - return -ENOMEM;
  534 + validate_nommu_regions();
  535 + rb_erase(&region->vm_rb, &nommu_region_tree);
  536 + validate_nommu_regions();
474 537 }
475 538  
476 539 /*
477   - * look up the first VMA exactly that exactly matches addr
478   - * - should be called with mm->mmap_sem at least held readlocked
  540 + * free a contiguous series of pages
479 541 */
480   -static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
481   - unsigned long addr)
  542 +static void free_page_series(unsigned long from, unsigned long to)
482 543 {
483   - struct vm_list_struct *vml;
  544 + for (; from < to; from += PAGE_SIZE) {
  545 + struct page *page = virt_to_page(from);
484 546  
485   - /* search the vm_start ordered list */
486   - for (vml = mm->context.vmlist; vml; vml = vml->next) {
487   - if (vml->vma->vm_start == addr)
488   - return vml->vma;
489   - if (vml->vma->vm_start > addr)
490   - break;
  547 + kdebug("- free %lx", from);
  548 + atomic_dec(&mmap_pages_allocated);
  549 + if (page_count(page) != 1)
  550 + kdebug("free page %p [%d]", page, page_count(page));
  551 + put_page(page);
491 552 }
492   -
493   - return NULL;
494 553 }
495 554  
496 555 /*
497   - * find a VMA in the global tree
  556 + * release a reference to a region
  557 + * - the caller must hold the region semaphore, which this releases
  558 + * - the region may not have been added to the tree yet, in which case vm_top
  559 + * will equal vm_start
498 560 */
499   -static inline struct vm_area_struct *find_nommu_vma(unsigned long start)
  561 +static void __put_nommu_region(struct vm_region *region)
  562 + __releases(nommu_region_sem)
500 563 {
501   - struct vm_area_struct *vma;
502   - struct rb_node *n = nommu_vma_tree.rb_node;
  564 + kenter("%p{%d}", region, atomic_read(&region->vm_usage));
503 565  
504   - while (n) {
505   - vma = rb_entry(n, struct vm_area_struct, vm_rb);
  566 + BUG_ON(!nommu_region_tree.rb_node);
506 567  
507   - if (start < vma->vm_start)
508   - n = n->rb_left;
509   - else if (start > vma->vm_start)
510   - n = n->rb_right;
511   - else
512   - return vma;
  568 + if (atomic_dec_and_test(&region->vm_usage)) {
  569 + if (region->vm_top > region->vm_start)
  570 + delete_nommu_region(region);
  571 + up_write(&nommu_region_sem);
  572 +
  573 + if (region->vm_file)
  574 + fput(region->vm_file);
  575 +
  576 + /* IO memory and memory shared directly out of the pagecache
  577 + * from ramfs/tmpfs mustn't be released here */
  578 + if (region->vm_flags & VM_MAPPED_COPY) {
  579 + kdebug("free series");
  580 + free_page_series(region->vm_start, region->vm_top);
  581 + }
  582 + kmem_cache_free(vm_region_jar, region);
  583 + } else {
  584 + up_write(&nommu_region_sem);
513 585 }
  586 +}
514 587  
515   - return NULL;
  588 +/*
  589 + * release a reference to a region
  590 + */
  591 +static void put_nommu_region(struct vm_region *region)
  592 +{
  593 + down_write(&nommu_region_sem);
  594 + __put_nommu_region(region);
516 595 }
517 596  
518 597 /*
519   - * add a VMA in the global tree
  598 + * add a VMA into a process's mm_struct in the appropriate place in the list
  599 + * and tree and add to the address space's page tree also if not an anonymous
  600 + * page
  601 + * - should be called with mm->mmap_sem held writelocked
520 602 */
521   -static void add_nommu_vma(struct vm_area_struct *vma)
  603 +static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
522 604 {
523   - struct vm_area_struct *pvma;
  605 + struct vm_area_struct *pvma, **pp;
524 606 struct address_space *mapping;
525   - struct rb_node **p = &nommu_vma_tree.rb_node;
526   - struct rb_node *parent = NULL;
  607 + struct rb_node **p, *parent;
527 608  
  609 + kenter(",%p", vma);
  610 +
  611 + BUG_ON(!vma->vm_region);
  612 +
  613 + mm->map_count++;
  614 + vma->vm_mm = mm;
  615 +
528 616 /* add the VMA to the mapping */
529 617 if (vma->vm_file) {
530 618 mapping = vma->vm_file->f_mapping;
531 619  
532 620  
533 621  
534 622  
535 623  
536 624  
537 625  
538 626  
539 627  
... ... @@ -534,43 +622,63 @@
534 622 flush_dcache_mmap_unlock(mapping);
535 623 }
536 624  
537   - /* add the VMA to the master list */
  625 + /* add the VMA to the tree */
  626 + parent = NULL;
  627 + p = &mm->mm_rb.rb_node;
538 628 while (*p) {
539 629 parent = *p;
540 630 pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
541 631  
542   - if (vma->vm_start < pvma->vm_start) {
  632 + /* sort by: start addr, end addr, VMA struct addr in that order
  633 + * (the latter is necessary as we may get identical VMAs) */
  634 + if (vma->vm_start < pvma->vm_start)
543 635 p = &(*p)->rb_left;
544   - }
545   - else if (vma->vm_start > pvma->vm_start) {
  636 + else if (vma->vm_start > pvma->vm_start)
546 637 p = &(*p)->rb_right;
547   - }
548   - else {
549   - /* mappings are at the same address - this can only
550   - * happen for shared-mem chardevs and shared file
551   - * mappings backed by ramfs/tmpfs */
552   - BUG_ON(!(pvma->vm_flags & VM_SHARED));
553   -
554   - if (vma < pvma)
555   - p = &(*p)->rb_left;
556   - else if (vma > pvma)
557   - p = &(*p)->rb_right;
558   - else
559   - BUG();
560   - }
  638 + else if (vma->vm_end < pvma->vm_end)
  639 + p = &(*p)->rb_left;
  640 + else if (vma->vm_end > pvma->vm_end)
  641 + p = &(*p)->rb_right;
  642 + else if (vma < pvma)
  643 + p = &(*p)->rb_left;
  644 + else if (vma > pvma)
  645 + p = &(*p)->rb_right;
  646 + else
  647 + BUG();
561 648 }
562 649  
563 650 rb_link_node(&vma->vm_rb, parent, p);
564   - rb_insert_color(&vma->vm_rb, &nommu_vma_tree);
  651 + rb_insert_color(&vma->vm_rb, &mm->mm_rb);
  652 +
  653 + /* add VMA to the VMA list also */
  654 + for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) {
  655 + if (pvma->vm_start > vma->vm_start)
  656 + break;
  657 + if (pvma->vm_start < vma->vm_start)
  658 + continue;
  659 + if (pvma->vm_end < vma->vm_end)
  660 + break;
  661 + }
  662 +
  663 + vma->vm_next = *pp;
  664 + *pp = vma;
565 665 }
566 666  
567 667 /*
568   - * delete a VMA from the global list
  668 + * delete a VMA from its owning mm_struct and address space
569 669 */
570   -static void delete_nommu_vma(struct vm_area_struct *vma)
  670 +static void delete_vma_from_mm(struct vm_area_struct *vma)
571 671 {
  672 + struct vm_area_struct **pp;
572 673 struct address_space *mapping;
  674 + struct mm_struct *mm = vma->vm_mm;
573 675  
  676 + kenter("%p", vma);
  677 +
  678 + mm->map_count--;
  679 + if (mm->mmap_cache == vma)
  680 + mm->mmap_cache = NULL;
  681 +
574 682 /* remove the VMA from the mapping */
575 683 if (vma->vm_file) {
576 684 mapping = vma->vm_file->f_mapping;
577 685  
... ... @@ -580,11 +688,118 @@
580 688 flush_dcache_mmap_unlock(mapping);
581 689 }
582 690  
583   - /* remove from the master list */
584   - rb_erase(&vma->vm_rb, &nommu_vma_tree);
  691 + /* remove from the MM's tree and list */
  692 + rb_erase(&vma->vm_rb, &mm->mm_rb);
  693 + for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) {
  694 + if (*pp == vma) {
  695 + *pp = vma->vm_next;
  696 + break;
  697 + }
  698 + }
  699 +
  700 + vma->vm_mm = NULL;
585 701 }
586 702  
587 703 /*
  704 + * destroy a VMA record
  705 + */
  706 +static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
  707 +{
  708 + kenter("%p", vma);
  709 + if (vma->vm_ops && vma->vm_ops->close)
  710 + vma->vm_ops->close(vma);
  711 + if (vma->vm_file) {
  712 + fput(vma->vm_file);
  713 + if (vma->vm_flags & VM_EXECUTABLE)
  714 + removed_exe_file_vma(mm);
  715 + }
  716 + put_nommu_region(vma->vm_region);
  717 + kmem_cache_free(vm_area_cachep, vma);
  718 +}
  719 +
  720 +/*
  721 + * look up the first VMA in which addr resides, NULL if none
  722 + * - should be called with mm->mmap_sem at least held readlocked
  723 + */
  724 +struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
  725 +{
  726 + struct vm_area_struct *vma;
  727 + struct rb_node *n = mm->mm_rb.rb_node;
  728 +
  729 + /* check the cache first */
  730 + vma = mm->mmap_cache;
  731 + if (vma && vma->vm_start <= addr && vma->vm_end > addr)
  732 + return vma;
  733 +
  734 + /* trawl the tree (there may be multiple mappings in which addr
  735 + * resides) */
  736 + for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
  737 + vma = rb_entry(n, struct vm_area_struct, vm_rb);
  738 + if (vma->vm_start > addr)
  739 + return NULL;
  740 + if (vma->vm_end > addr) {
  741 + mm->mmap_cache = vma;
  742 + return vma;
  743 + }
  744 + }
  745 +
  746 + return NULL;
  747 +}
  748 +EXPORT_SYMBOL(find_vma);
  749 +
  750 +/*
  751 + * find a VMA
  752 + * - we don't extend stack VMAs under NOMMU conditions
  753 + */
  754 +struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
  755 +{
  756 + return find_vma(mm, addr);
  757 +}
  758 +
  759 +/*
  760 + * expand a stack to a given address
  761 + * - not supported under NOMMU conditions
  762 + */
  763 +int expand_stack(struct vm_area_struct *vma, unsigned long address)
  764 +{
  765 + return -ENOMEM;
  766 +}
  767 +
  768 +/*
  769 + * look up the first VMA exactly that exactly matches addr
  770 + * - should be called with mm->mmap_sem at least held readlocked
  771 + */
  772 +static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
  773 + unsigned long addr,
  774 + unsigned long len)
  775 +{
  776 + struct vm_area_struct *vma;
  777 + struct rb_node *n = mm->mm_rb.rb_node;
  778 + unsigned long end = addr + len;
  779 +
  780 + /* check the cache first */
  781 + vma = mm->mmap_cache;
  782 + if (vma && vma->vm_start == addr && vma->vm_end == end)
  783 + return vma;
  784 +
  785 + /* trawl the tree (there may be multiple mappings in which addr
  786 + * resides) */
  787 + for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
  788 + vma = rb_entry(n, struct vm_area_struct, vm_rb);
  789 + if (vma->vm_start < addr)
  790 + continue;
  791 + if (vma->vm_start > addr)
  792 + return NULL;
  793 + if (vma->vm_end == end) {
  794 + mm->mmap_cache = vma;
  795 + return vma;
  796 + }
  797 + }
  798 +
  799 + return NULL;
  800 +}
  801 +
  802 +/*
588 803 * determine whether a mapping should be permitted and, if so, what sort of
589 804 * mapping we're capable of supporting
590 805 */
... ... @@ -596,7 +811,7 @@
596 811 unsigned long pgoff,
597 812 unsigned long *_capabilities)
598 813 {
599   - unsigned long capabilities;
  814 + unsigned long capabilities, rlen;
600 815 unsigned long reqprot = prot;
601 816 int ret;
602 817  
603 818  
... ... @@ -616,12 +831,12 @@
616 831 return -EINVAL;
617 832  
618 833 /* Careful about overflows.. */
619   - len = PAGE_ALIGN(len);
620   - if (!len || len > TASK_SIZE)
  834 + rlen = PAGE_ALIGN(len);
  835 + if (!rlen || rlen > TASK_SIZE)
621 836 return -ENOMEM;
622 837  
623 838 /* offset overflow? */
624   - if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
  839 + if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff)
625 840 return -EOVERFLOW;
626 841  
627 842 if (file) {
628 843  
629 844  
... ... @@ -795,13 +1010,18 @@
795 1010 }
796 1011  
797 1012 /*
798   - * set up a shared mapping on a file
  1013 + * set up a shared mapping on a file (the driver or filesystem provides and
  1014 + * pins the storage)
799 1015 */
800   -static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
  1016 +static int do_mmap_shared_file(struct vm_area_struct *vma)
801 1017 {
802 1018 int ret;
803 1019  
804 1020 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
  1021 + if (ret == 0) {
  1022 + vma->vm_region->vm_top = vma->vm_region->vm_end;
  1023 + return ret;
  1024 + }
805 1025 if (ret != -ENOSYS)
806 1026 return ret;
807 1027  
808 1028  
809 1029  
... ... @@ -815,10 +1035,14 @@
815 1035 /*
816 1036 * set up a private mapping or an anonymous shared mapping
817 1037 */
818   -static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
  1038 +static int do_mmap_private(struct vm_area_struct *vma,
  1039 + struct vm_region *region,
  1040 + unsigned long len)
819 1041 {
  1042 + struct page *pages;
  1043 + unsigned long total, point, n, rlen;
820 1044 void *base;
821   - int ret;
  1045 + int ret, order;
822 1046  
823 1047 /* invoke the file's mapping function so that it can keep track of
824 1048 * shared mappings on devices or memory
825 1049  
826 1050  
827 1051  
828 1052  
829 1053  
830 1054  
831 1055  
... ... @@ -826,35 +1050,64 @@
826 1050 */
827 1051 if (vma->vm_file) {
828 1052 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
829   - if (ret != -ENOSYS) {
  1053 + if (ret == 0) {
830 1054 /* shouldn't return success if we're not sharing */
831   - BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE));
832   - return ret; /* success or a real error */
  1055 + BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
  1056 + vma->vm_region->vm_top = vma->vm_region->vm_end;
  1057 + return ret;
833 1058 }
  1059 + if (ret != -ENOSYS)
  1060 + return ret;
834 1061  
835 1062 /* getting an ENOSYS error indicates that direct mmap isn't
836 1063 * possible (as opposed to tried but failed) so we'll try to
837 1064 * make a private copy of the data and map that instead */
838 1065 }
839 1066  
  1067 + rlen = PAGE_ALIGN(len);
  1068 +
840 1069 /* allocate some memory to hold the mapping
841 1070 * - note that this may not return a page-aligned address if the object
842 1071 * we're allocating is smaller than a page
843 1072 */
844   - base = kmalloc(len, GFP_KERNEL|__GFP_COMP);
845   - if (!base)
  1073 + order = get_order(rlen);
  1074 + kdebug("alloc order %d for %lx", order, len);
  1075 +
  1076 + pages = alloc_pages(GFP_KERNEL, order);
  1077 + if (!pages)
846 1078 goto enomem;
847 1079  
848   - vma->vm_start = (unsigned long) base;
849   - vma->vm_end = vma->vm_start + len;
850   - vma->vm_flags |= VM_MAPPED_COPY;
  1080 + total = 1 << order;
  1081 + atomic_add(total, &mmap_pages_allocated);
851 1082  
852   -#ifdef WARN_ON_SLACK
853   - if (len + WARN_ON_SLACK <= kobjsize(result))
854   - printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n",
855   - len, current->pid, kobjsize(result) - len);
856   -#endif
  1083 + point = rlen >> PAGE_SHIFT;
857 1084  
  1085 + /* we allocated a power-of-2 sized page set, so we may want to trim off
  1086 + * the excess */
  1087 + if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) {
  1088 + while (total > point) {
  1089 + order = ilog2(total - point);
  1090 + n = 1 << order;
  1091 + kdebug("shave %lu/%lu @%lu", n, total - point, total);
  1092 + atomic_sub(n, &mmap_pages_allocated);
  1093 + total -= n;
  1094 + set_page_refcounted(pages + total);
  1095 + __free_pages(pages + total, order);
  1096 + }
  1097 + }
  1098 +
  1099 + for (point = 1; point < total; point++)
  1100 + set_page_refcounted(&pages[point]);
  1101 +
  1102 + base = page_address(pages);
  1103 + region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
  1104 + region->vm_start = (unsigned long) base;
  1105 + region->vm_end = region->vm_start + rlen;
  1106 + region->vm_top = region->vm_start + (total << PAGE_SHIFT);
  1107 +
  1108 + vma->vm_start = region->vm_start;
  1109 + vma->vm_end = region->vm_start + len;
  1110 +
858 1111 if (vma->vm_file) {
859 1112 /* read the contents of a file into the copy */
860 1113 mm_segment_t old_fs;
861 1114  
862 1115  
863 1116  
... ... @@ -865,26 +1118,28 @@
865 1118  
866 1119 old_fs = get_fs();
867 1120 set_fs(KERNEL_DS);
868   - ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos);
  1121 + ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos);
869 1122 set_fs(old_fs);
870 1123  
871 1124 if (ret < 0)
872 1125 goto error_free;
873 1126  
874 1127 /* clear the last little bit */
875   - if (ret < len)
876   - memset(base + ret, 0, len - ret);
  1128 + if (ret < rlen)
  1129 + memset(base + ret, 0, rlen - ret);
877 1130  
878 1131 } else {
879 1132 /* if it's an anonymous mapping, then just clear it */
880   - memset(base, 0, len);
  1133 + memset(base, 0, rlen);
881 1134 }
882 1135  
883 1136 return 0;
884 1137  
885 1138 error_free:
886   - kfree(base);
887   - vma->vm_start = 0;
  1139 + free_page_series(region->vm_start, region->vm_end);
  1140 + region->vm_start = vma->vm_start = 0;
  1141 + region->vm_end = vma->vm_end = 0;
  1142 + region->vm_top = 0;
888 1143 return ret;
889 1144  
890 1145 enomem:
891 1146  
892 1147  
... ... @@ -904,13 +1159,14 @@
904 1159 unsigned long flags,
905 1160 unsigned long pgoff)
906 1161 {
907   - struct vm_list_struct *vml = NULL;
908   - struct vm_area_struct *vma = NULL;
  1162 + struct vm_area_struct *vma;
  1163 + struct vm_region *region;
909 1164 struct rb_node *rb;
910   - unsigned long capabilities, vm_flags;
911   - void *result;
  1165 + unsigned long capabilities, vm_flags, result;
912 1166 int ret;
913 1167  
  1168 + kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
  1169 +
914 1170 if (!(flags & MAP_FIXED))
915 1171 addr = round_hint_to_min(addr);
916 1172  
917 1173  
918 1174  
919 1175  
920 1176  
921 1177  
922 1178  
923 1179  
924 1180  
925 1181  
926 1182  
927 1183  
928 1184  
929 1185  
930 1186  
931 1187  
932 1188  
... ... @@ -918,73 +1174,120 @@
918 1174 * mapping */
919 1175 ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
920 1176 &capabilities);
921   - if (ret < 0)
  1177 + if (ret < 0) {
  1178 + kleave(" = %d [val]", ret);
922 1179 return ret;
  1180 + }
923 1181  
924 1182 /* we've determined that we can make the mapping, now translate what we
925 1183 * now know into VMA flags */
926 1184 vm_flags = determine_vm_flags(file, prot, flags, capabilities);
927 1185  
928   - /* we're going to need to record the mapping if it works */
929   - vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
930   - if (!vml)
931   - goto error_getting_vml;
  1186 + /* we're going to need to record the mapping */
  1187 + region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
  1188 + if (!region)
  1189 + goto error_getting_region;
932 1190  
933   - down_write(&nommu_vma_sem);
  1191 + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
  1192 + if (!vma)
  1193 + goto error_getting_vma;
934 1194  
935   - /* if we want to share, we need to check for VMAs created by other
  1195 + atomic_set(&region->vm_usage, 1);
  1196 + region->vm_flags = vm_flags;
  1197 + region->vm_pgoff = pgoff;
  1198 +
  1199 + INIT_LIST_HEAD(&vma->anon_vma_node);
  1200 + vma->vm_flags = vm_flags;
  1201 + vma->vm_pgoff = pgoff;
  1202 +
  1203 + if (file) {
  1204 + region->vm_file = file;
  1205 + get_file(file);
  1206 + vma->vm_file = file;
  1207 + get_file(file);
  1208 + if (vm_flags & VM_EXECUTABLE) {
  1209 + added_exe_file_vma(current->mm);
  1210 + vma->vm_mm = current->mm;
  1211 + }
  1212 + }
  1213 +
  1214 + down_write(&nommu_region_sem);
  1215 +
  1216 + /* if we want to share, we need to check for regions created by other
936 1217 * mmap() calls that overlap with our proposed mapping
937   - * - we can only share with an exact match on most regular files
  1218 + * - we can only share with a superset match on most regular files
938 1219 * - shared mappings on character devices and memory backed files are
939 1220 * permitted to overlap inexactly as far as we are concerned for in
940 1221 * these cases, sharing is handled in the driver or filesystem rather
941 1222 * than here
942 1223 */
943 1224 if (vm_flags & VM_MAYSHARE) {
944   - unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
945   - unsigned long vmpglen;
  1225 + struct vm_region *pregion;
  1226 + unsigned long pglen, rpglen, pgend, rpgend, start;
946 1227  
947   - /* suppress VMA sharing for shared regions */
948   - if (vm_flags & VM_SHARED &&
949   - capabilities & BDI_CAP_MAP_DIRECT)
950   - goto dont_share_VMAs;
  1228 + pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
  1229 + pgend = pgoff + pglen;
951 1230  
952   - for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
953   - vma = rb_entry(rb, struct vm_area_struct, vm_rb);
  1231 + for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
  1232 + pregion = rb_entry(rb, struct vm_region, vm_rb);
954 1233  
955   - if (!(vma->vm_flags & VM_MAYSHARE))
  1234 + if (!(pregion->vm_flags & VM_MAYSHARE))
956 1235 continue;
957 1236  
958 1237 /* search for overlapping mappings on the same file */
959   - if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode)
  1238 + if (pregion->vm_file->f_path.dentry->d_inode !=
  1239 + file->f_path.dentry->d_inode)
960 1240 continue;
961 1241  
962   - if (vma->vm_pgoff >= pgoff + pglen)
  1242 + if (pregion->vm_pgoff >= pgend)
963 1243 continue;
964 1244  
965   - vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1;
966   - vmpglen >>= PAGE_SHIFT;
967   - if (pgoff >= vma->vm_pgoff + vmpglen)
  1245 + rpglen = pregion->vm_end - pregion->vm_start;
  1246 + rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;
  1247 + rpgend = pregion->vm_pgoff + rpglen;
  1248 + if (pgoff >= rpgend)
968 1249 continue;
969 1250  
970   - /* handle inexactly overlapping matches between mappings */
971   - if (vma->vm_pgoff != pgoff || vmpglen != pglen) {
  1251 + /* handle inexactly overlapping matches between
  1252 + * mappings */
  1253 + if ((pregion->vm_pgoff != pgoff || rpglen != pglen) &&
  1254 + !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) {
  1255 + /* new mapping is not a subset of the region */
972 1256 if (!(capabilities & BDI_CAP_MAP_DIRECT))
973 1257 goto sharing_violation;
974 1258 continue;
975 1259 }
976 1260  
977   - /* we've found a VMA we can share */
978   - atomic_inc(&vma->vm_usage);
  1261 + /* we've found a region we can share */
  1262 + atomic_inc(&pregion->vm_usage);
  1263 + vma->vm_region = pregion;
  1264 + start = pregion->vm_start;
  1265 + start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
  1266 + vma->vm_start = start;
  1267 + vma->vm_end = start + len;
979 1268  
980   - vml->vma = vma;
981   - result = (void *) vma->vm_start;
982   - goto shared;
  1269 + if (pregion->vm_flags & VM_MAPPED_COPY) {
  1270 + kdebug("share copy");
  1271 + vma->vm_flags |= VM_MAPPED_COPY;
  1272 + } else {
  1273 + kdebug("share mmap");
  1274 + ret = do_mmap_shared_file(vma);
  1275 + if (ret < 0) {
  1276 + vma->vm_region = NULL;
  1277 + vma->vm_start = 0;
  1278 + vma->vm_end = 0;
  1279 + atomic_dec(&pregion->vm_usage);
  1280 + pregion = NULL;
  1281 + goto error_just_free;
  1282 + }
  1283 + }
  1284 + fput(region->vm_file);
  1285 + kmem_cache_free(vm_region_jar, region);
  1286 + region = pregion;
  1287 + result = start;
  1288 + goto share;
983 1289 }
984 1290  
985   - dont_share_VMAs:
986   - vma = NULL;
987   -
988 1291 /* obtain the address at which to make a shared mapping
989 1292 * - this is the hook for quasi-memory character devices to
990 1293 * tell us the location of a shared mapping
991 1294  
992 1295  
993 1296  
994 1297  
995 1298  
996 1299  
997 1300  
998 1301  
999 1302  
1000 1303  
1001 1304  
1002 1305  
1003 1306  
1004 1307  
1005 1308  
1006 1309  
1007 1310  
1008 1311  
1009 1312  
1010 1313  
1011 1314  
... ... @@ -995,113 +1298,93 @@
995 1298 if (IS_ERR((void *) addr)) {
996 1299 ret = addr;
997 1300 if (ret != (unsigned long) -ENOSYS)
998   - goto error;
  1301 + goto error_just_free;
999 1302  
1000 1303 /* the driver refused to tell us where to site
1001 1304 * the mapping so we'll have to attempt to copy
1002 1305 * it */
1003 1306 ret = (unsigned long) -ENODEV;
1004 1307 if (!(capabilities & BDI_CAP_MAP_COPY))
1005   - goto error;
  1308 + goto error_just_free;
1006 1309  
1007 1310 capabilities &= ~BDI_CAP_MAP_DIRECT;
  1311 + } else {
  1312 + vma->vm_start = region->vm_start = addr;
  1313 + vma->vm_end = region->vm_end = addr + len;
1008 1314 }
1009 1315 }
1010 1316 }
1011 1317  
1012   - /* we're going to need a VMA struct as well */
1013   - vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
1014   - if (!vma)
1015   - goto error_getting_vma;
  1318 + vma->vm_region = region;
1016 1319  
1017   - INIT_LIST_HEAD(&vma->anon_vma_node);
1018   - atomic_set(&vma->vm_usage, 1);
1019   - if (file) {
1020   - get_file(file);
1021   - if (vm_flags & VM_EXECUTABLE) {
1022   - added_exe_file_vma(current->mm);
1023   - vma->vm_mm = current->mm;
1024   - }
1025   - }
1026   - vma->vm_file = file;
1027   - vma->vm_flags = vm_flags;
1028   - vma->vm_start = addr;
1029   - vma->vm_end = addr + len;
1030   - vma->vm_pgoff = pgoff;
1031   -
1032   - vml->vma = vma;
1033   -
1034 1320 /* set up the mapping */
1035 1321 if (file && vma->vm_flags & VM_SHARED)
1036   - ret = do_mmap_shared_file(vma, len);
  1322 + ret = do_mmap_shared_file(vma);
1037 1323 else
1038   - ret = do_mmap_private(vma, len);
  1324 + ret = do_mmap_private(vma, region, len);
1039 1325 if (ret < 0)
1040   - goto error;
  1326 + goto error_put_region;
1041 1327  
  1328 + add_nommu_region(region);
  1329 +
1042 1330 /* okay... we have a mapping; now we have to register it */
1043   - result = (void *) vma->vm_start;
  1331 + result = vma->vm_start;
1044 1332  
1045   - if (vma->vm_flags & VM_MAPPED_COPY) {
1046   - realalloc += kobjsize(result);
1047   - askedalloc += len;
1048   - }
1049   -
1050   - realalloc += kobjsize(vma);
1051   - askedalloc += sizeof(*vma);
1052   -
1053 1333 current->mm->total_vm += len >> PAGE_SHIFT;
1054 1334  
1055   - add_nommu_vma(vma);
  1335 +share:
  1336 + add_vma_to_mm(current->mm, vma);
1056 1337  
1057   - shared:
1058   - realalloc += kobjsize(vml);
1059   - askedalloc += sizeof(*vml);
  1338 + up_write(&nommu_region_sem);
1060 1339  
1061   - add_vma_to_mm(current->mm, vml);
1062   -
1063   - up_write(&nommu_vma_sem);
1064   -
1065 1340 if (prot & PROT_EXEC)
1066   - flush_icache_range((unsigned long) result,
1067   - (unsigned long) result + len);
  1341 + flush_icache_range(result, result + len);
1068 1342  
1069   -#ifdef DEBUG
1070   - printk("do_mmap:\n");
1071   - show_process_blocks();
1072   -#endif
  1343 + kleave(" = %lx", result);
  1344 + return result;
1073 1345  
1074   - return (unsigned long) result;
1075   -
1076   - error:
1077   - up_write(&nommu_vma_sem);
1078   - kfree(vml);
  1346 +error_put_region:
  1347 + __put_nommu_region(region);
1079 1348 if (vma) {
1080 1349 if (vma->vm_file) {
1081 1350 fput(vma->vm_file);
1082 1351 if (vma->vm_flags & VM_EXECUTABLE)
1083 1352 removed_exe_file_vma(vma->vm_mm);
1084 1353 }
1085   - kfree(vma);
  1354 + kmem_cache_free(vm_area_cachep, vma);
1086 1355 }
  1356 + kleave(" = %d [pr]", ret);
1087 1357 return ret;
1088 1358  
1089   - sharing_violation:
1090   - up_write(&nommu_vma_sem);
1091   - printk("Attempt to share mismatched mappings\n");
1092   - kfree(vml);
1093   - return -EINVAL;
  1359 +error_just_free:
  1360 + up_write(&nommu_region_sem);
  1361 +error:
  1362 + fput(region->vm_file);
  1363 + kmem_cache_free(vm_region_jar, region);
  1364 + fput(vma->vm_file);
  1365 + if (vma->vm_flags & VM_EXECUTABLE)
  1366 + removed_exe_file_vma(vma->vm_mm);
  1367 + kmem_cache_free(vm_area_cachep, vma);
  1368 + kleave(" = %d", ret);
  1369 + return ret;
1094 1370  
1095   - error_getting_vma:
1096   - up_write(&nommu_vma_sem);
1097   - kfree(vml);
1098   - printk("Allocation of vma for %lu byte allocation from process %d failed\n",
  1371 +sharing_violation:
  1372 + up_write(&nommu_region_sem);
  1373 + printk(KERN_WARNING "Attempt to share mismatched mappings\n");
  1374 + ret = -EINVAL;
  1375 + goto error;
  1376 +
  1377 +error_getting_vma:
  1378 + kmem_cache_free(vm_region_jar, region);
  1379 + printk(KERN_WARNING "Allocation of vma for %lu byte allocation"
  1380 + " from process %d failed\n",
1099 1381 len, current->pid);
1100 1382 show_free_areas();
1101 1383 return -ENOMEM;
1102 1384  
1103   - error_getting_vml:
1104   - printk("Allocation of vml for %lu byte allocation from process %d failed\n",
  1385 +error_getting_region:
  1386 + printk(KERN_WARNING "Allocation of vm region for %lu byte allocation"
  1387 + " from process %d failed\n",
1105 1388 len, current->pid);
1106 1389 show_free_areas();
1107 1390 return -ENOMEM;
1108 1391  
1109 1392  
1110 1393  
1111 1394  
1112 1395  
1113 1396  
1114 1397  
1115 1398  
1116 1399  
1117 1400  
1118 1401  
1119 1402  
1120 1403  
1121 1404  
1122 1405  
1123 1406  
1124 1407  
1125 1408  
1126 1409  
1127 1410  
1128 1411  
1129 1412  
... ... @@ -1109,85 +1392,183 @@
1109 1392 EXPORT_SYMBOL(do_mmap_pgoff);
1110 1393  
1111 1394 /*
1112   - * handle mapping disposal for uClinux
  1395 + * split a vma into two pieces at address 'addr', a new vma is allocated either
  1396 + * for the first part or the tail.
1113 1397 */
1114   -static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma)
  1398 +int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  1399 + unsigned long addr, int new_below)
1115 1400 {
1116   - if (vma) {
1117   - down_write(&nommu_vma_sem);
  1401 + struct vm_area_struct *new;
  1402 + struct vm_region *region;
  1403 + unsigned long npages;
1118 1404  
1119   - if (atomic_dec_and_test(&vma->vm_usage)) {
1120   - delete_nommu_vma(vma);
  1405 + kenter("");
1121 1406  
1122   - if (vma->vm_ops && vma->vm_ops->close)
1123   - vma->vm_ops->close(vma);
  1407 + /* we're only permitted to split anonymous regions that have a single
  1408 + * owner */
  1409 + if (vma->vm_file ||
  1410 + atomic_read(&vma->vm_region->vm_usage) != 1)
  1411 + return -ENOMEM;
1124 1412  
1125   - /* IO memory and memory shared directly out of the pagecache from
1126   - * ramfs/tmpfs mustn't be released here */
1127   - if (vma->vm_flags & VM_MAPPED_COPY) {
1128   - realalloc -= kobjsize((void *) vma->vm_start);
1129   - askedalloc -= vma->vm_end - vma->vm_start;
1130   - kfree((void *) vma->vm_start);
1131   - }
  1413 + if (mm->map_count >= sysctl_max_map_count)
  1414 + return -ENOMEM;
1132 1415  
1133   - realalloc -= kobjsize(vma);
1134   - askedalloc -= sizeof(*vma);
  1416 + region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL);
  1417 + if (!region)
  1418 + return -ENOMEM;
1135 1419  
1136   - if (vma->vm_file) {
1137   - fput(vma->vm_file);
1138   - if (vma->vm_flags & VM_EXECUTABLE)
1139   - removed_exe_file_vma(mm);
1140   - }
1141   - kfree(vma);
1142   - }
  1420 + new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
  1421 + if (!new) {
  1422 + kmem_cache_free(vm_region_jar, region);
  1423 + return -ENOMEM;
  1424 + }
1143 1425  
1144   - up_write(&nommu_vma_sem);
  1426 + /* most fields are the same, copy all, and then fixup */
  1427 + *new = *vma;
  1428 + *region = *vma->vm_region;
  1429 + new->vm_region = region;
  1430 +
  1431 + npages = (addr - vma->vm_start) >> PAGE_SHIFT;
  1432 +
  1433 + if (new_below) {
  1434 + region->vm_top = region->vm_end = new->vm_end = addr;
  1435 + } else {
  1436 + region->vm_start = new->vm_start = addr;
  1437 + region->vm_pgoff = new->vm_pgoff += npages;
1145 1438 }
  1439 +
  1440 + if (new->vm_ops && new->vm_ops->open)
  1441 + new->vm_ops->open(new);
  1442 +
  1443 + delete_vma_from_mm(vma);
  1444 + down_write(&nommu_region_sem);
  1445 + delete_nommu_region(vma->vm_region);
  1446 + if (new_below) {
  1447 + vma->vm_region->vm_start = vma->vm_start = addr;
  1448 + vma->vm_region->vm_pgoff = vma->vm_pgoff += npages;
  1449 + } else {
  1450 + vma->vm_region->vm_end = vma->vm_end = addr;
  1451 + vma->vm_region->vm_top = addr;
  1452 + }
  1453 + add_nommu_region(vma->vm_region);
  1454 + add_nommu_region(new->vm_region);
  1455 + up_write(&nommu_region_sem);
  1456 + add_vma_to_mm(mm, vma);
  1457 + add_vma_to_mm(mm, new);
  1458 + return 0;
1146 1459 }
1147 1460  
1148 1461 /*
1149   - * release a mapping
1150   - * - under NOMMU conditions the parameters must match exactly to the mapping to
1151   - * be removed
  1462 + * shrink a VMA by removing the specified chunk from either the beginning or
  1463 + * the end
1152 1464 */
1153   -int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
  1465 +static int shrink_vma(struct mm_struct *mm,
  1466 + struct vm_area_struct *vma,
  1467 + unsigned long from, unsigned long to)
1154 1468 {
1155   - struct vm_list_struct *vml, **parent;
1156   - unsigned long end = addr + len;
  1469 + struct vm_region *region;
1157 1470  
1158   -#ifdef DEBUG
1159   - printk("do_munmap:\n");
1160   -#endif
  1471 + kenter("");
1161 1472  
1162   - for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) {
1163   - if ((*parent)->vma->vm_start > addr)
1164   - break;
1165   - if ((*parent)->vma->vm_start == addr &&
1166   - ((len == 0) || ((*parent)->vma->vm_end == end)))
1167   - goto found;
  1473 + /* adjust the VMA's pointers, which may reposition it in the MM's tree
  1474 + * and list */
  1475 + delete_vma_from_mm(vma);
  1476 + if (from > vma->vm_start)
  1477 + vma->vm_end = from;
  1478 + else
  1479 + vma->vm_start = to;
  1480 + add_vma_to_mm(mm, vma);
  1481 +
  1482 + /* cut the backing region down to size */
  1483 + region = vma->vm_region;
  1484 + BUG_ON(atomic_read(&region->vm_usage) != 1);
  1485 +
  1486 + down_write(&nommu_region_sem);
  1487 + delete_nommu_region(region);
  1488 + if (from > region->vm_start) {
  1489 + to = region->vm_top;
  1490 + region->vm_top = region->vm_end = from;
  1491 + } else {
  1492 + region->vm_start = to;
1168 1493 }
  1494 + add_nommu_region(region);
  1495 + up_write(&nommu_region_sem);
1169 1496  
1170   - printk("munmap of non-mmaped memory by process %d (%s): %p\n",
1171   - current->pid, current->comm, (void *) addr);
1172   - return -EINVAL;
  1497 + free_page_series(from, to);
  1498 + return 0;
  1499 +}
1173 1500  
1174   - found:
1175   - vml = *parent;
  1501 +/*
  1502 + * release a mapping
  1503 + * - under NOMMU conditions the chunk to be unmapped must be backed by a single
  1504 + * VMA, though it need not cover the whole VMA
  1505 + */
  1506 +int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
  1507 +{
  1508 + struct vm_area_struct *vma;
  1509 + struct rb_node *rb;
  1510 + unsigned long end = start + len;
  1511 + int ret;
1176 1512  
1177   - put_vma(mm, vml->vma);
  1513 + kenter(",%lx,%zx", start, len);
1178 1514  
1179   - *parent = vml->next;
1180   - realalloc -= kobjsize(vml);
1181   - askedalloc -= sizeof(*vml);
1182   - kfree(vml);
  1515 + if (len == 0)
  1516 + return -EINVAL;
1183 1517  
1184   - update_hiwater_vm(mm);
1185   - mm->total_vm -= len >> PAGE_SHIFT;
  1518 + /* find the first potentially overlapping VMA */
  1519 + vma = find_vma(mm, start);
  1520 + if (!vma) {
  1521 + printk(KERN_WARNING
  1522 + "munmap of memory not mmapped by process %d (%s):"
  1523 + " 0x%lx-0x%lx\n",
  1524 + current->pid, current->comm, start, start + len - 1);
  1525 + return -EINVAL;
  1526 + }
1186 1527  
1187   -#ifdef DEBUG
1188   - show_process_blocks();
1189   -#endif
  1528 + /* we're allowed to split an anonymous VMA but not a file-backed one */
  1529 + if (vma->vm_file) {
  1530 + do {
  1531 + if (start > vma->vm_start) {
  1532 + kleave(" = -EINVAL [miss]");
  1533 + return -EINVAL;
  1534 + }
  1535 + if (end == vma->vm_end)
  1536 + goto erase_whole_vma;
  1537 + rb = rb_next(&vma->vm_rb);
  1538 + vma = rb_entry(rb, struct vm_area_struct, vm_rb);
  1539 + } while (rb);
  1540 + kleave(" = -EINVAL [split file]");
  1541 + return -EINVAL;
  1542 + } else {
  1543 + /* the chunk must be a subset of the VMA found */
  1544 + if (start == vma->vm_start && end == vma->vm_end)
  1545 + goto erase_whole_vma;
  1546 + if (start < vma->vm_start || end > vma->vm_end) {
  1547 + kleave(" = -EINVAL [superset]");
  1548 + return -EINVAL;
  1549 + }
  1550 + if (start & ~PAGE_MASK) {
  1551 + kleave(" = -EINVAL [unaligned start]");
  1552 + return -EINVAL;
  1553 + }
  1554 + if (end != vma->vm_end && end & ~PAGE_MASK) {
  1555 + kleave(" = -EINVAL [unaligned split]");
  1556 + return -EINVAL;
  1557 + }
  1558 + if (start != vma->vm_start && end != vma->vm_end) {
  1559 + ret = split_vma(mm, vma, start, 1);
  1560 + if (ret < 0) {
  1561 + kleave(" = %d [split]", ret);
  1562 + return ret;
  1563 + }
  1564 + }
  1565 + return shrink_vma(mm, vma, start, end);
  1566 + }
1190 1567  
  1568 +erase_whole_vma:
  1569 + delete_vma_from_mm(vma);
  1570 + delete_vma(mm, vma);
  1571 + kleave(" = 0");
1191 1572 return 0;
1192 1573 }
1193 1574 EXPORT_SYMBOL(do_munmap);
1194 1575  
1195 1576  
1196 1577  
1197 1578  
1198 1579  
1199 1580  
1200 1581  
... ... @@ -1204,32 +1585,26 @@
1204 1585 }
1205 1586  
1206 1587 /*
1207   - * Release all mappings
  1588 + * release all the mappings made in a process's VM space
1208 1589 */
1209   -void exit_mmap(struct mm_struct * mm)
  1590 +void exit_mmap(struct mm_struct *mm)
1210 1591 {
1211   - struct vm_list_struct *tmp;
  1592 + struct vm_area_struct *vma;
1212 1593  
1213   - if (mm) {
1214   -#ifdef DEBUG
1215   - printk("Exit_mmap:\n");
1216   -#endif
  1594 + if (!mm)
  1595 + return;
1217 1596  
1218   - mm->total_vm = 0;
  1597 + kenter("");
1219 1598  
1220   - while ((tmp = mm->context.vmlist)) {
1221   - mm->context.vmlist = tmp->next;
1222   - put_vma(mm, tmp->vma);
  1599 + mm->total_vm = 0;
1223 1600  
1224   - realalloc -= kobjsize(tmp);
1225   - askedalloc -= sizeof(*tmp);
1226   - kfree(tmp);
1227   - }
1228   -
1229   -#ifdef DEBUG
1230   - show_process_blocks();
1231   -#endif
  1601 + while ((vma = mm->mmap)) {
  1602 + mm->mmap = vma->vm_next;
  1603 + delete_vma_from_mm(vma);
  1604 + delete_vma(mm, vma);
1232 1605 }
  1606 +
  1607 + kleave("");
1233 1608 }
1234 1609  
1235 1610 unsigned long do_brk(unsigned long addr, unsigned long len)
... ... @@ -1242,8 +1617,8 @@
1242 1617 * time (controlled by the MREMAP_MAYMOVE flag and available VM space)
1243 1618 *
1244 1619 * under NOMMU conditions, we only permit changing a mapping's size, and only
1245   - * as long as it stays within the hole allocated by the kmalloc() call in
1246   - * do_mmap_pgoff() and the block is not shareable
  1620 + * as long as it stays within the region allocated by do_mmap_private() and the
  1621 + * block is not shareable
1247 1622 *
1248 1623 * MREMAP_FIXED is not supported under NOMMU conditions
1249 1624 */
1250 1625  
1251 1626  
... ... @@ -1254,13 +1629,16 @@
1254 1629 struct vm_area_struct *vma;
1255 1630  
1256 1631 /* insanity checks first */
1257   - if (new_len == 0)
  1632 + if (old_len == 0 || new_len == 0)
1258 1633 return (unsigned long) -EINVAL;
1259 1634  
  1635 + if (addr & ~PAGE_MASK)
  1636 + return -EINVAL;
  1637 +
1260 1638 if (flags & MREMAP_FIXED && new_addr != addr)
1261 1639 return (unsigned long) -EINVAL;
1262 1640  
1263   - vma = find_vma_exact(current->mm, addr);
  1641 + vma = find_vma_exact(current->mm, addr, old_len);
1264 1642 if (!vma)
1265 1643 return (unsigned long) -EINVAL;
1266 1644  
1267 1645  
1268 1646  
... ... @@ -1270,22 +1648,19 @@
1270 1648 if (vma->vm_flags & VM_MAYSHARE)
1271 1649 return (unsigned long) -EPERM;
1272 1650  
1273   - if (new_len > kobjsize((void *) addr))
  1651 + if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
1274 1652 return (unsigned long) -ENOMEM;
1275 1653  
1276 1654 /* all checks complete - do it */
1277 1655 vma->vm_end = vma->vm_start + new_len;
1278   -
1279   - askedalloc -= old_len;
1280   - askedalloc += new_len;
1281   -
1282 1656 return vma->vm_start;
1283 1657 }
1284 1658 EXPORT_SYMBOL(do_mremap);
1285 1659  
1286   -asmlinkage unsigned long sys_mremap(unsigned long addr,
1287   - unsigned long old_len, unsigned long new_len,
1288   - unsigned long flags, unsigned long new_addr)
  1660 +asmlinkage
  1661 +unsigned long sys_mremap(unsigned long addr,
  1662 + unsigned long old_len, unsigned long new_len,
  1663 + unsigned long flags, unsigned long new_addr)
1289 1664 {
1290 1665 unsigned long ret;
1291 1666