Commit c40f6f8bbc4cbd2902671aacd587400ddca62627
Exists in
master
and in
4 other branches
Merge git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-nommu
* git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-nommu: NOMMU: Support XIP on initramfs NOMMU: Teach kobjsize() about VMA regions. FLAT: Don't attempt to expand the userspace stack to fill the space allocated FDPIC: Don't attempt to expand the userspace stack to fill the space allocated NOMMU: Improve procfs output using per-MM VMAs NOMMU: Make mmap allocation page trimming behaviour configurable. NOMMU: Make VMAs per MM as for MMU-mode linux NOMMU: Delete askedalloc and realalloc variables NOMMU: Rename ARM's struct vm_region NOMMU: Fix cleanup handling in ramfs_nommu_get_umapped_area()
Showing 29 changed files Side-by-side Diff
- Documentation/nommu-mmap.txt
- Documentation/sysctl/vm.txt
- arch/arm/include/asm/mmu.h
- arch/arm/mm/dma-mapping.c
- arch/blackfin/include/asm/mmu.h
- arch/blackfin/kernel/ptrace.c
- arch/blackfin/kernel/traps.c
- arch/frv/kernel/ptrace.c
- arch/h8300/include/asm/mmu.h
- arch/m68knommu/include/asm/mmu.h
- arch/sh/include/asm/mmu.h
- fs/binfmt_elf_fdpic.c
- fs/binfmt_flat.c
- fs/proc/internal.h
- fs/proc/meminfo.c
- fs/proc/nommu.c
- fs/proc/task_nommu.c
- fs/ramfs/file-nommu.c
- include/asm-frv/mmu.h
- include/asm-m32r/mmu.h
- include/linux/mm.h
- include/linux/mm_types.h
- init/initramfs.c
- ipc/shm.c
- kernel/fork.c
- kernel/sysctl.c
- lib/Kconfig.debug
- mm/mmap.c
- mm/nommu.c
Documentation/nommu-mmap.txt
... | ... | @@ -109,13 +109,19 @@ |
109 | 109 | FURTHER NOTES ON NO-MMU MMAP |
110 | 110 | ============================ |
111 | 111 | |
112 | - (*) A request for a private mapping of less than a page in size may not return | |
113 | - a page-aligned buffer. This is because the kernel calls kmalloc() to | |
114 | - allocate the buffer, not get_free_page(). | |
112 | + (*) A request for a private mapping of a file may return a buffer that is not | |
113 | + page-aligned. This is because XIP may take place, and the data may not be | |
114 | + paged aligned in the backing store. | |
115 | 115 | |
116 | - (*) A list of all the mappings on the system is visible through /proc/maps in | |
117 | - no-MMU mode. | |
116 | + (*) A request for an anonymous mapping will always be page aligned. If | |
117 | + possible the size of the request should be a power of two otherwise some | |
118 | + of the space may be wasted as the kernel must allocate a power-of-2 | |
119 | + granule but will only discard the excess if appropriately configured as | |
120 | + this has an effect on fragmentation. | |
118 | 121 | |
122 | + (*) A list of all the private copy and anonymous mappings on the system is | |
123 | + visible through /proc/maps in no-MMU mode. | |
124 | + | |
119 | 125 | (*) A list of all the mappings in use by a process is visible through |
120 | 126 | /proc/<pid>/maps in no-MMU mode. |
121 | 127 | |
... | ... | @@ -242,4 +248,19 @@ |
242 | 248 | Provision of shared mappings on block device files is exactly the same as for |
243 | 249 | character devices. If there isn't a real device underneath, then the driver |
244 | 250 | should allocate sufficient contiguous memory to honour any supported mapping. |
251 | + | |
252 | + | |
253 | +================================= | |
254 | +ADJUSTING PAGE TRIMMING BEHAVIOUR | |
255 | +================================= | |
256 | + | |
257 | +NOMMU mmap automatically rounds up to the nearest power-of-2 number of pages | |
258 | +when performing an allocation. This can have adverse effects on memory | |
259 | +fragmentation, and as such, is left configurable. The default behaviour is to | |
260 | +aggressively trim allocations and discard any excess pages back in to the page | |
261 | +allocator. In order to retain finer-grained control over fragmentation, this | |
262 | +behaviour can either be disabled completely, or bumped up to a higher page | |
263 | +watermark where trimming begins. | |
264 | + | |
265 | +Page trimming behaviour is configurable via the sysctl `vm.nr_trim_pages'. |
Documentation/sysctl/vm.txt
... | ... | @@ -38,6 +38,7 @@ |
38 | 38 | - numa_zonelist_order |
39 | 39 | - nr_hugepages |
40 | 40 | - nr_overcommit_hugepages |
41 | +- nr_trim_pages (only if CONFIG_MMU=n) | |
41 | 42 | |
42 | 43 | ============================================================== |
43 | 44 | |
... | ... | @@ -348,4 +349,21 @@ |
348 | 349 | nr_hugepages + nr_overcommit_hugepages. |
349 | 350 | |
350 | 351 | See Documentation/vm/hugetlbpage.txt |
352 | + | |
353 | +============================================================== | |
354 | + | |
355 | +nr_trim_pages | |
356 | + | |
357 | +This is available only on NOMMU kernels. | |
358 | + | |
359 | +This value adjusts the excess page trimming behaviour of power-of-2 aligned | |
360 | +NOMMU mmap allocations. | |
361 | + | |
362 | +A value of 0 disables trimming of allocations entirely, while a value of 1 | |
363 | +trims excess pages aggressively. Any value >= 1 acts as the watermark where | |
364 | +trimming of allocations is initiated. | |
365 | + | |
366 | +The default value is 1. | |
367 | + | |
368 | +See Documentation/nommu-mmap.txt for more information. |
arch/arm/include/asm/mmu.h
arch/arm/mm/dma-mapping.c
... | ... | @@ -71,7 +71,7 @@ |
71 | 71 | * the amount of RAM found at boot time.) I would imagine that get_vm_area() |
72 | 72 | * would have to initialise this each time prior to calling vm_region_alloc(). |
73 | 73 | */ |
74 | -struct vm_region { | |
74 | +struct arm_vm_region { | |
75 | 75 | struct list_head vm_list; |
76 | 76 | unsigned long vm_start; |
77 | 77 | unsigned long vm_end; |
78 | 78 | |
79 | 79 | |
80 | 80 | |
... | ... | @@ -79,20 +79,20 @@ |
79 | 79 | int vm_active; |
80 | 80 | }; |
81 | 81 | |
82 | -static struct vm_region consistent_head = { | |
82 | +static struct arm_vm_region consistent_head = { | |
83 | 83 | .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), |
84 | 84 | .vm_start = CONSISTENT_BASE, |
85 | 85 | .vm_end = CONSISTENT_END, |
86 | 86 | }; |
87 | 87 | |
88 | -static struct vm_region * | |
89 | -vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp) | |
88 | +static struct arm_vm_region * | |
89 | +arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp) | |
90 | 90 | { |
91 | 91 | unsigned long addr = head->vm_start, end = head->vm_end - size; |
92 | 92 | unsigned long flags; |
93 | - struct vm_region *c, *new; | |
93 | + struct arm_vm_region *c, *new; | |
94 | 94 | |
95 | - new = kmalloc(sizeof(struct vm_region), gfp); | |
95 | + new = kmalloc(sizeof(struct arm_vm_region), gfp); | |
96 | 96 | if (!new) |
97 | 97 | goto out; |
98 | 98 | |
99 | 99 | |
... | ... | @@ -127,9 +127,9 @@ |
127 | 127 | return NULL; |
128 | 128 | } |
129 | 129 | |
130 | -static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr) | |
130 | +static struct arm_vm_region *arm_vm_region_find(struct arm_vm_region *head, unsigned long addr) | |
131 | 131 | { |
132 | - struct vm_region *c; | |
132 | + struct arm_vm_region *c; | |
133 | 133 | |
134 | 134 | list_for_each_entry(c, &head->vm_list, vm_list) { |
135 | 135 | if (c->vm_active && c->vm_start == addr) |
... | ... | @@ -149,7 +149,7 @@ |
149 | 149 | pgprot_t prot) |
150 | 150 | { |
151 | 151 | struct page *page; |
152 | - struct vm_region *c; | |
152 | + struct arm_vm_region *c; | |
153 | 153 | unsigned long order; |
154 | 154 | u64 mask = ISA_DMA_THRESHOLD, limit; |
155 | 155 | |
... | ... | @@ -214,7 +214,7 @@ |
214 | 214 | /* |
215 | 215 | * Allocate a virtual address in the consistent mapping region. |
216 | 216 | */ |
217 | - c = vm_region_alloc(&consistent_head, size, | |
217 | + c = arm_vm_region_alloc(&consistent_head, size, | |
218 | 218 | gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); |
219 | 219 | if (c) { |
220 | 220 | pte_t *pte; |
221 | 221 | |
... | ... | @@ -311,13 +311,13 @@ |
311 | 311 | void *cpu_addr, dma_addr_t dma_addr, size_t size) |
312 | 312 | { |
313 | 313 | unsigned long flags, user_size, kern_size; |
314 | - struct vm_region *c; | |
314 | + struct arm_vm_region *c; | |
315 | 315 | int ret = -ENXIO; |
316 | 316 | |
317 | 317 | user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; |
318 | 318 | |
319 | 319 | spin_lock_irqsave(&consistent_lock, flags); |
320 | - c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); | |
320 | + c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); | |
321 | 321 | spin_unlock_irqrestore(&consistent_lock, flags); |
322 | 322 | |
323 | 323 | if (c) { |
... | ... | @@ -359,7 +359,7 @@ |
359 | 359 | */ |
360 | 360 | void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) |
361 | 361 | { |
362 | - struct vm_region *c; | |
362 | + struct arm_vm_region *c; | |
363 | 363 | unsigned long flags, addr; |
364 | 364 | pte_t *ptep; |
365 | 365 | int idx; |
... | ... | @@ -378,7 +378,7 @@ |
378 | 378 | size = PAGE_ALIGN(size); |
379 | 379 | |
380 | 380 | spin_lock_irqsave(&consistent_lock, flags); |
381 | - c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); | |
381 | + c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); | |
382 | 382 | if (!c) |
383 | 383 | goto no_area; |
384 | 384 |
arch/blackfin/include/asm/mmu.h
arch/blackfin/kernel/ptrace.c
... | ... | @@ -160,15 +160,15 @@ |
160 | 160 | static inline int is_user_addr_valid(struct task_struct *child, |
161 | 161 | unsigned long start, unsigned long len) |
162 | 162 | { |
163 | - struct vm_list_struct *vml; | |
163 | + struct vm_area_struct *vma; | |
164 | 164 | struct sram_list_struct *sraml; |
165 | 165 | |
166 | 166 | /* overflow */ |
167 | 167 | if (start + len < start) |
168 | 168 | return -EIO; |
169 | 169 | |
170 | - for (vml = child->mm->context.vmlist; vml; vml = vml->next) | |
171 | - if (start >= vml->vma->vm_start && start + len < vml->vma->vm_end) | |
170 | + vma = find_vma(child->mm, start); | |
171 | + if (vma && start >= vma->vm_start && start + len <= vma->vm_end) | |
172 | 172 | return 0; |
173 | 173 | |
174 | 174 | for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next) |
arch/blackfin/kernel/traps.c
... | ... | @@ -32,6 +32,7 @@ |
32 | 32 | #include <linux/module.h> |
33 | 33 | #include <linux/kallsyms.h> |
34 | 34 | #include <linux/fs.h> |
35 | +#include <linux/rbtree.h> | |
35 | 36 | #include <asm/traps.h> |
36 | 37 | #include <asm/cacheflush.h> |
37 | 38 | #include <asm/cplb.h> |
... | ... | @@ -83,6 +84,7 @@ |
83 | 84 | struct mm_struct *mm; |
84 | 85 | unsigned long flags, offset; |
85 | 86 | unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); |
87 | + struct rb_node *n; | |
86 | 88 | |
87 | 89 | #ifdef CONFIG_KALLSYMS |
88 | 90 | unsigned long symsize; |
89 | 91 | |
... | ... | @@ -128,10 +130,11 @@ |
128 | 130 | if (!mm) |
129 | 131 | continue; |
130 | 132 | |
131 | - vml = mm->context.vmlist; | |
132 | - while (vml) { | |
133 | - struct vm_area_struct *vma = vml->vma; | |
133 | + for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { | |
134 | + struct vm_area_struct *vma; | |
134 | 135 | |
136 | + vma = rb_entry(n, struct vm_area_struct, vm_rb); | |
137 | + | |
135 | 138 | if (address >= vma->vm_start && address < vma->vm_end) { |
136 | 139 | char _tmpbuf[256]; |
137 | 140 | char *name = p->comm; |
... | ... | @@ -176,8 +179,6 @@ |
176 | 179 | |
177 | 180 | goto done; |
178 | 181 | } |
179 | - | |
180 | - vml = vml->next; | |
181 | 182 | } |
182 | 183 | if (!in_atomic) |
183 | 184 | mmput(mm); |
arch/frv/kernel/ptrace.c
... | ... | @@ -69,7 +69,8 @@ |
69 | 69 | } |
70 | 70 | |
71 | 71 | /* |
72 | - * check that an address falls within the bounds of the target process's memory mappings | |
72 | + * check that an address falls within the bounds of the target process's memory | |
73 | + * mappings | |
73 | 74 | */ |
74 | 75 | static inline int is_user_addr_valid(struct task_struct *child, |
75 | 76 | unsigned long start, unsigned long len) |
76 | 77 | |
... | ... | @@ -79,11 +80,11 @@ |
79 | 80 | return -EIO; |
80 | 81 | return 0; |
81 | 82 | #else |
82 | - struct vm_list_struct *vml; | |
83 | + struct vm_area_struct *vma; | |
83 | 84 | |
84 | - for (vml = child->mm->context.vmlist; vml; vml = vml->next) | |
85 | - if (start >= vml->vma->vm_start && start + len <= vml->vma->vm_end) | |
86 | - return 0; | |
85 | + vma = find_vma(child->mm, start); | |
86 | + if (vma && start >= vma->vm_start && start + len <= vma->vm_end) | |
87 | + return 0; | |
87 | 88 | |
88 | 89 | return -EIO; |
89 | 90 | #endif |
arch/h8300/include/asm/mmu.h
arch/m68knommu/include/asm/mmu.h
arch/sh/include/asm/mmu.h
fs/binfmt_elf_fdpic.c
... | ... | @@ -168,9 +168,6 @@ |
168 | 168 | struct elf_fdpic_params exec_params, interp_params; |
169 | 169 | struct elf_phdr *phdr; |
170 | 170 | unsigned long stack_size, entryaddr; |
171 | -#ifndef CONFIG_MMU | |
172 | - unsigned long fullsize; | |
173 | -#endif | |
174 | 171 | #ifdef ELF_FDPIC_PLAT_INIT |
175 | 172 | unsigned long dynaddr; |
176 | 173 | #endif |
... | ... | @@ -390,11 +387,6 @@ |
390 | 387 | goto error_kill; |
391 | 388 | } |
392 | 389 | |
393 | - /* expand the stack mapping to use up the entire allocation granule */ | |
394 | - fullsize = kobjsize((char *) current->mm->start_brk); | |
395 | - if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size, | |
396 | - fullsize, 0, 0))) | |
397 | - stack_size = fullsize; | |
398 | 390 | up_write(¤t->mm->mmap_sem); |
399 | 391 | |
400 | 392 | current->mm->brk = current->mm->start_brk; |
401 | 393 | |
... | ... | @@ -1567,11 +1559,9 @@ |
1567 | 1559 | static int elf_fdpic_dump_segments(struct file *file, size_t *size, |
1568 | 1560 | unsigned long *limit, unsigned long mm_flags) |
1569 | 1561 | { |
1570 | - struct vm_list_struct *vml; | |
1562 | + struct vm_area_struct *vma; | |
1571 | 1563 | |
1572 | - for (vml = current->mm->context.vmlist; vml; vml = vml->next) { | |
1573 | - struct vm_area_struct *vma = vml->vma; | |
1574 | - | |
1564 | + for (vma = current->mm->mmap; vma; vma = vma->vm_next) { | |
1575 | 1565 | if (!maydump(vma, mm_flags)) |
1576 | 1566 | continue; |
1577 | 1567 | |
... | ... | @@ -1617,9 +1607,6 @@ |
1617 | 1607 | elf_fpxregset_t *xfpu = NULL; |
1618 | 1608 | #endif |
1619 | 1609 | int thread_status_size = 0; |
1620 | -#ifndef CONFIG_MMU | |
1621 | - struct vm_list_struct *vml; | |
1622 | -#endif | |
1623 | 1610 | elf_addr_t *auxv; |
1624 | 1611 | unsigned long mm_flags; |
1625 | 1612 | |
1626 | 1613 | |
... | ... | @@ -1685,13 +1672,7 @@ |
1685 | 1672 | fill_prstatus(prstatus, current, signr); |
1686 | 1673 | elf_core_copy_regs(&prstatus->pr_reg, regs); |
1687 | 1674 | |
1688 | -#ifdef CONFIG_MMU | |
1689 | 1675 | segs = current->mm->map_count; |
1690 | -#else | |
1691 | - segs = 0; | |
1692 | - for (vml = current->mm->context.vmlist; vml; vml = vml->next) | |
1693 | - segs++; | |
1694 | -#endif | |
1695 | 1676 | #ifdef ELF_CORE_EXTRA_PHDRS |
1696 | 1677 | segs += ELF_CORE_EXTRA_PHDRS; |
1697 | 1678 | #endif |
1698 | 1679 | |
... | ... | @@ -1766,19 +1747,9 @@ |
1766 | 1747 | mm_flags = current->mm->flags; |
1767 | 1748 | |
1768 | 1749 | /* write program headers for segments dump */ |
1769 | - for ( | |
1770 | -#ifdef CONFIG_MMU | |
1771 | - vma = current->mm->mmap; vma; vma = vma->vm_next | |
1772 | -#else | |
1773 | - vml = current->mm->context.vmlist; vml; vml = vml->next | |
1774 | -#endif | |
1775 | - ) { | |
1750 | + for (vma = current->mm->mmap; vma; vma = vma->vm_next) { | |
1776 | 1751 | struct elf_phdr phdr; |
1777 | 1752 | size_t sz; |
1778 | - | |
1779 | -#ifndef CONFIG_MMU | |
1780 | - vma = vml->vma; | |
1781 | -#endif | |
1782 | 1753 | |
1783 | 1754 | sz = vma->vm_end - vma->vm_start; |
1784 | 1755 |
fs/binfmt_flat.c
... | ... | @@ -417,8 +417,8 @@ |
417 | 417 | unsigned long textpos = 0, datapos = 0, result; |
418 | 418 | unsigned long realdatastart = 0; |
419 | 419 | unsigned long text_len, data_len, bss_len, stack_len, flags; |
420 | - unsigned long len, reallen, memp = 0; | |
421 | - unsigned long extra, rlim; | |
420 | + unsigned long len, memp = 0; | |
421 | + unsigned long memp_size, extra, rlim; | |
422 | 422 | unsigned long *reloc = 0, *rp; |
423 | 423 | struct inode *inode; |
424 | 424 | int i, rev, relocs = 0; |
425 | 425 | |
... | ... | @@ -543,17 +543,10 @@ |
543 | 543 | } |
544 | 544 | |
545 | 545 | len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); |
546 | + len = PAGE_ALIGN(len); | |
546 | 547 | down_write(¤t->mm->mmap_sem); |
547 | 548 | realdatastart = do_mmap(0, 0, len, |
548 | 549 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); |
549 | - /* Remap to use all availabe slack region space */ | |
550 | - if (realdatastart && (realdatastart < (unsigned long)-4096)) { | |
551 | - reallen = kobjsize((void *)realdatastart); | |
552 | - if (reallen > len) { | |
553 | - realdatastart = do_mremap(realdatastart, len, | |
554 | - reallen, MREMAP_FIXED, realdatastart); | |
555 | - } | |
556 | - } | |
557 | 550 | up_write(¤t->mm->mmap_sem); |
558 | 551 | |
559 | 552 | if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { |
560 | 553 | |
561 | 554 | |
... | ... | @@ -591,21 +584,14 @@ |
591 | 584 | |
592 | 585 | reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len)); |
593 | 586 | memp = realdatastart; |
594 | - | |
587 | + memp_size = len; | |
595 | 588 | } else { |
596 | 589 | |
597 | 590 | len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); |
591 | + len = PAGE_ALIGN(len); | |
598 | 592 | down_write(¤t->mm->mmap_sem); |
599 | 593 | textpos = do_mmap(0, 0, len, |
600 | 594 | PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); |
601 | - /* Remap to use all availabe slack region space */ | |
602 | - if (textpos && (textpos < (unsigned long) -4096)) { | |
603 | - reallen = kobjsize((void *)textpos); | |
604 | - if (reallen > len) { | |
605 | - textpos = do_mremap(textpos, len, reallen, | |
606 | - MREMAP_FIXED, textpos); | |
607 | - } | |
608 | - } | |
609 | 595 | up_write(¤t->mm->mmap_sem); |
610 | 596 | |
611 | 597 | if (!textpos || textpos >= (unsigned long) -4096) { |
... | ... | @@ -622,7 +608,7 @@ |
622 | 608 | reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) + |
623 | 609 | MAX_SHARED_LIBS * sizeof(unsigned long)); |
624 | 610 | memp = textpos; |
625 | - | |
611 | + memp_size = len; | |
626 | 612 | #ifdef CONFIG_BINFMT_ZFLAT |
627 | 613 | /* |
628 | 614 | * load it all in and treat it like a RAM load from now on |
629 | 615 | |
... | ... | @@ -680,10 +666,12 @@ |
680 | 666 | * set up the brk stuff, uses any slack left in data/bss/stack |
681 | 667 | * allocation. We put the brk after the bss (between the bss |
682 | 668 | * and stack) like other platforms. |
669 | + * Userspace code relies on the stack pointer starting out at | |
670 | + * an address right at the end of a page. | |
683 | 671 | */ |
684 | 672 | current->mm->start_brk = datapos + data_len + bss_len; |
685 | 673 | current->mm->brk = (current->mm->start_brk + 3) & ~3; |
686 | - current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len; | |
674 | + current->mm->context.end_brk = memp + memp_size - stack_len; | |
687 | 675 | } |
688 | 676 | |
689 | 677 | if (flags & FLAT_FLAG_KTRACE) |
... | ... | @@ -790,8 +778,8 @@ |
790 | 778 | |
791 | 779 | /* zero the BSS, BRK and stack areas */ |
792 | 780 | memset((void*)(datapos + data_len), 0, bss_len + |
793 | - (memp + kobjsize((void *) memp) - stack_len - /* end brk */ | |
794 | - libinfo->lib_list[id].start_brk) + /* start brk */ | |
781 | + (memp + memp_size - stack_len - /* end brk */ | |
782 | + libinfo->lib_list[id].start_brk) + /* start brk */ | |
795 | 783 | stack_len); |
796 | 784 | |
797 | 785 | return 0; |
fs/proc/internal.h
fs/proc/meminfo.c
... | ... | @@ -74,6 +74,9 @@ |
74 | 74 | "LowTotal: %8lu kB\n" |
75 | 75 | "LowFree: %8lu kB\n" |
76 | 76 | #endif |
77 | +#ifndef CONFIG_MMU | |
78 | + "MmapCopy: %8lu kB\n" | |
79 | +#endif | |
77 | 80 | "SwapTotal: %8lu kB\n" |
78 | 81 | "SwapFree: %8lu kB\n" |
79 | 82 | "Dirty: %8lu kB\n" |
... | ... | @@ -115,6 +118,9 @@ |
115 | 118 | K(i.freehigh), |
116 | 119 | K(i.totalram-i.totalhigh), |
117 | 120 | K(i.freeram-i.freehigh), |
121 | +#endif | |
122 | +#ifndef CONFIG_MMU | |
123 | + K((unsigned long) atomic_read(&mmap_pages_allocated)), | |
118 | 124 | #endif |
119 | 125 | K(i.totalswap), |
120 | 126 | K(i.freeswap), |
fs/proc/nommu.c
... | ... | @@ -33,33 +33,33 @@ |
33 | 33 | #include "internal.h" |
34 | 34 | |
35 | 35 | /* |
36 | - * display a single VMA to a sequenced file | |
36 | + * display a single region to a sequenced file | |
37 | 37 | */ |
38 | -int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |
38 | +static int nommu_region_show(struct seq_file *m, struct vm_region *region) | |
39 | 39 | { |
40 | 40 | unsigned long ino = 0; |
41 | 41 | struct file *file; |
42 | 42 | dev_t dev = 0; |
43 | 43 | int flags, len; |
44 | 44 | |
45 | - flags = vma->vm_flags; | |
46 | - file = vma->vm_file; | |
45 | + flags = region->vm_flags; | |
46 | + file = region->vm_file; | |
47 | 47 | |
48 | 48 | if (file) { |
49 | - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | |
49 | + struct inode *inode = region->vm_file->f_path.dentry->d_inode; | |
50 | 50 | dev = inode->i_sb->s_dev; |
51 | 51 | ino = inode->i_ino; |
52 | 52 | } |
53 | 53 | |
54 | 54 | seq_printf(m, |
55 | 55 | "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
56 | - vma->vm_start, | |
57 | - vma->vm_end, | |
56 | + region->vm_start, | |
57 | + region->vm_end, | |
58 | 58 | flags & VM_READ ? 'r' : '-', |
59 | 59 | flags & VM_WRITE ? 'w' : '-', |
60 | 60 | flags & VM_EXEC ? 'x' : '-', |
61 | 61 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', |
62 | - ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, | |
62 | + ((loff_t)region->vm_pgoff) << PAGE_SHIFT, | |
63 | 63 | MAJOR(dev), MINOR(dev), ino, &len); |
64 | 64 | |
65 | 65 | if (file) { |
66 | 66 | |
67 | 67 | |
68 | 68 | |
69 | 69 | |
70 | 70 | |
71 | 71 | |
72 | 72 | |
73 | 73 | |
74 | 74 | |
75 | 75 | |
76 | 76 | |
77 | 77 | |
78 | 78 | |
79 | 79 | |
80 | 80 | |
... | ... | @@ -75,61 +75,54 @@ |
75 | 75 | } |
76 | 76 | |
77 | 77 | /* |
78 | - * display a list of all the VMAs the kernel knows about | |
78 | + * display a list of all the REGIONs the kernel knows about | |
79 | 79 | * - nommu kernals have a single flat list |
80 | 80 | */ |
81 | -static int nommu_vma_list_show(struct seq_file *m, void *v) | |
81 | +static int nommu_region_list_show(struct seq_file *m, void *_p) | |
82 | 82 | { |
83 | - struct vm_area_struct *vma; | |
83 | + struct rb_node *p = _p; | |
84 | 84 | |
85 | - vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); | |
86 | - return nommu_vma_show(m, vma); | |
85 | + return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb)); | |
87 | 86 | } |
88 | 87 | |
89 | -static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) | |
88 | +static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos) | |
90 | 89 | { |
91 | - struct rb_node *_rb; | |
90 | + struct rb_node *p; | |
92 | 91 | loff_t pos = *_pos; |
93 | - void *next = NULL; | |
94 | 92 | |
95 | - down_read(&nommu_vma_sem); | |
93 | + down_read(&nommu_region_sem); | |
96 | 94 | |
97 | - for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) { | |
98 | - if (pos == 0) { | |
99 | - next = _rb; | |
100 | - break; | |
101 | - } | |
102 | - pos--; | |
103 | - } | |
104 | - | |
105 | - return next; | |
95 | + for (p = rb_first(&nommu_region_tree); p; p = rb_next(p)) | |
96 | + if (pos-- == 0) | |
97 | + return p; | |
98 | + return NULL; | |
106 | 99 | } |
107 | 100 | |
108 | -static void nommu_vma_list_stop(struct seq_file *m, void *v) | |
101 | +static void nommu_region_list_stop(struct seq_file *m, void *v) | |
109 | 102 | { |
110 | - up_read(&nommu_vma_sem); | |
103 | + up_read(&nommu_region_sem); | |
111 | 104 | } |
112 | 105 | |
113 | -static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) | |
106 | +static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos) | |
114 | 107 | { |
115 | 108 | (*pos)++; |
116 | 109 | return rb_next((struct rb_node *) v); |
117 | 110 | } |
118 | 111 | |
119 | -static const struct seq_operations proc_nommu_vma_list_seqop = { | |
120 | - .start = nommu_vma_list_start, | |
121 | - .next = nommu_vma_list_next, | |
122 | - .stop = nommu_vma_list_stop, | |
123 | - .show = nommu_vma_list_show | |
112 | +static struct seq_operations proc_nommu_region_list_seqop = { | |
113 | + .start = nommu_region_list_start, | |
114 | + .next = nommu_region_list_next, | |
115 | + .stop = nommu_region_list_stop, | |
116 | + .show = nommu_region_list_show | |
124 | 117 | }; |
125 | 118 | |
126 | -static int proc_nommu_vma_list_open(struct inode *inode, struct file *file) | |
119 | +static int proc_nommu_region_list_open(struct inode *inode, struct file *file) | |
127 | 120 | { |
128 | - return seq_open(file, &proc_nommu_vma_list_seqop); | |
121 | + return seq_open(file, &proc_nommu_region_list_seqop); | |
129 | 122 | } |
130 | 123 | |
131 | -static const struct file_operations proc_nommu_vma_list_operations = { | |
132 | - .open = proc_nommu_vma_list_open, | |
124 | +static const struct file_operations proc_nommu_region_list_operations = { | |
125 | + .open = proc_nommu_region_list_open, | |
133 | 126 | .read = seq_read, |
134 | 127 | .llseek = seq_lseek, |
135 | 128 | .release = seq_release, |
... | ... | @@ -137,7 +130,7 @@ |
137 | 130 | |
138 | 131 | static int __init proc_nommu_init(void) |
139 | 132 | { |
140 | - proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations); | |
133 | + proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations); | |
141 | 134 | return 0; |
142 | 135 | } |
143 | 136 |
fs/proc/task_nommu.c
... | ... | @@ -15,25 +15,32 @@ |
15 | 15 | */ |
16 | 16 | void task_mem(struct seq_file *m, struct mm_struct *mm) |
17 | 17 | { |
18 | - struct vm_list_struct *vml; | |
19 | - unsigned long bytes = 0, sbytes = 0, slack = 0; | |
18 | + struct vm_area_struct *vma; | |
19 | + struct vm_region *region; | |
20 | + struct rb_node *p; | |
21 | + unsigned long bytes = 0, sbytes = 0, slack = 0, size; | |
20 | 22 | |
21 | 23 | down_read(&mm->mmap_sem); |
22 | - for (vml = mm->context.vmlist; vml; vml = vml->next) { | |
23 | - if (!vml->vma) | |
24 | - continue; | |
24 | + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { | |
25 | + vma = rb_entry(p, struct vm_area_struct, vm_rb); | |
25 | 26 | |
26 | - bytes += kobjsize(vml); | |
27 | + bytes += kobjsize(vma); | |
28 | + | |
29 | + region = vma->vm_region; | |
30 | + if (region) { | |
31 | + size = kobjsize(region); | |
32 | + size += region->vm_end - region->vm_start; | |
33 | + } else { | |
34 | + size = vma->vm_end - vma->vm_start; | |
35 | + } | |
36 | + | |
27 | 37 | if (atomic_read(&mm->mm_count) > 1 || |
28 | - atomic_read(&vml->vma->vm_usage) > 1 | |
29 | - ) { | |
30 | - sbytes += kobjsize((void *) vml->vma->vm_start); | |
31 | - sbytes += kobjsize(vml->vma); | |
38 | + vma->vm_flags & VM_MAYSHARE) { | |
39 | + sbytes += size; | |
32 | 40 | } else { |
33 | - bytes += kobjsize((void *) vml->vma->vm_start); | |
34 | - bytes += kobjsize(vml->vma); | |
35 | - slack += kobjsize((void *) vml->vma->vm_start) - | |
36 | - (vml->vma->vm_end - vml->vma->vm_start); | |
41 | + bytes += size; | |
42 | + if (region) | |
43 | + slack = region->vm_end - vma->vm_end; | |
37 | 44 | } |
38 | 45 | } |
39 | 46 | |
40 | 47 | |
... | ... | @@ -70,13 +77,14 @@ |
70 | 77 | |
71 | 78 | unsigned long task_vsize(struct mm_struct *mm) |
72 | 79 | { |
73 | - struct vm_list_struct *tbp; | |
80 | + struct vm_area_struct *vma; | |
81 | + struct rb_node *p; | |
74 | 82 | unsigned long vsize = 0; |
75 | 83 | |
76 | 84 | down_read(&mm->mmap_sem); |
77 | - for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { | |
78 | - if (tbp->vma) | |
79 | - vsize += kobjsize((void *) tbp->vma->vm_start); | |
85 | + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { | |
86 | + vma = rb_entry(p, struct vm_area_struct, vm_rb); | |
87 | + vsize += vma->vm_end - vma->vm_start; | |
80 | 88 | } |
81 | 89 | up_read(&mm->mmap_sem); |
82 | 90 | return vsize; |
83 | 91 | |
... | ... | @@ -85,15 +93,19 @@ |
85 | 93 | int task_statm(struct mm_struct *mm, int *shared, int *text, |
86 | 94 | int *data, int *resident) |
87 | 95 | { |
88 | - struct vm_list_struct *tbp; | |
96 | + struct vm_area_struct *vma; | |
97 | + struct vm_region *region; | |
98 | + struct rb_node *p; | |
89 | 99 | int size = kobjsize(mm); |
90 | 100 | |
91 | 101 | down_read(&mm->mmap_sem); |
92 | - for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { | |
93 | - size += kobjsize(tbp); | |
94 | - if (tbp->vma) { | |
95 | - size += kobjsize(tbp->vma); | |
96 | - size += kobjsize((void *) tbp->vma->vm_start); | |
102 | + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { | |
103 | + vma = rb_entry(p, struct vm_area_struct, vm_rb); | |
104 | + size += kobjsize(vma); | |
105 | + region = vma->vm_region; | |
106 | + if (region) { | |
107 | + size += kobjsize(region); | |
108 | + size += region->vm_end - region->vm_start; | |
97 | 109 | } |
98 | 110 | } |
99 | 111 | |
100 | 112 | |
101 | 113 | |
102 | 114 | |
103 | 115 | |
104 | 116 | |
... | ... | @@ -105,20 +117,62 @@ |
105 | 117 | } |
106 | 118 | |
107 | 119 | /* |
120 | + * display a single VMA to a sequenced file | |
121 | + */ | |
122 | +static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |
123 | +{ | |
124 | + unsigned long ino = 0; | |
125 | + struct file *file; | |
126 | + dev_t dev = 0; | |
127 | + int flags, len; | |
128 | + | |
129 | + flags = vma->vm_flags; | |
130 | + file = vma->vm_file; | |
131 | + | |
132 | + if (file) { | |
133 | + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | |
134 | + dev = inode->i_sb->s_dev; | |
135 | + ino = inode->i_ino; | |
136 | + } | |
137 | + | |
138 | + seq_printf(m, | |
139 | + "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | |
140 | + vma->vm_start, | |
141 | + vma->vm_end, | |
142 | + flags & VM_READ ? 'r' : '-', | |
143 | + flags & VM_WRITE ? 'w' : '-', | |
144 | + flags & VM_EXEC ? 'x' : '-', | |
145 | + flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', | |
146 | + vma->vm_pgoff << PAGE_SHIFT, | |
147 | + MAJOR(dev), MINOR(dev), ino, &len); | |
148 | + | |
149 | + if (file) { | |
150 | + len = 25 + sizeof(void *) * 6 - len; | |
151 | + if (len < 1) | |
152 | + len = 1; | |
153 | + seq_printf(m, "%*c", len, ' '); | |
154 | + seq_path(m, &file->f_path, ""); | |
155 | + } | |
156 | + | |
157 | + seq_putc(m, '\n'); | |
158 | + return 0; | |
159 | +} | |
160 | + | |
161 | +/* | |
108 | 162 | * display mapping lines for a particular process's /proc/pid/maps |
109 | 163 | */ |
110 | -static int show_map(struct seq_file *m, void *_vml) | |
164 | +static int show_map(struct seq_file *m, void *_p) | |
111 | 165 | { |
112 | - struct vm_list_struct *vml = _vml; | |
166 | + struct rb_node *p = _p; | |
113 | 167 | |
114 | - return nommu_vma_show(m, vml->vma); | |
168 | + return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb)); | |
115 | 169 | } |
116 | 170 | |
117 | 171 | static void *m_start(struct seq_file *m, loff_t *pos) |
118 | 172 | { |
119 | 173 | struct proc_maps_private *priv = m->private; |
120 | - struct vm_list_struct *vml; | |
121 | 174 | struct mm_struct *mm; |
175 | + struct rb_node *p; | |
122 | 176 | loff_t n = *pos; |
123 | 177 | |
124 | 178 | /* pin the task and mm whilst we play with them */ |
125 | 179 | |
... | ... | @@ -134,9 +188,9 @@ |
134 | 188 | } |
135 | 189 | |
136 | 190 | /* start from the Nth VMA */ |
137 | - for (vml = mm->context.vmlist; vml; vml = vml->next) | |
191 | + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) | |
138 | 192 | if (n-- == 0) |
139 | - return vml; | |
193 | + return p; | |
140 | 194 | return NULL; |
141 | 195 | } |
142 | 196 | |
143 | 197 | |
144 | 198 | |
... | ... | @@ -152,12 +206,12 @@ |
152 | 206 | } |
153 | 207 | } |
154 | 208 | |
155 | -static void *m_next(struct seq_file *m, void *_vml, loff_t *pos) | |
209 | +static void *m_next(struct seq_file *m, void *_p, loff_t *pos) | |
156 | 210 | { |
157 | - struct vm_list_struct *vml = _vml; | |
211 | + struct rb_node *p = _p; | |
158 | 212 | |
159 | 213 | (*pos)++; |
160 | - return vml ? vml->next : NULL; | |
214 | + return p ? rb_next(p) : NULL; | |
161 | 215 | } |
162 | 216 | |
163 | 217 | static const struct seq_operations proc_pid_maps_ops = { |
fs/ramfs/file-nommu.c
... | ... | @@ -262,11 +262,11 @@ |
262 | 262 | ret = -ENOMEM; |
263 | 263 | pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL); |
264 | 264 | if (!pages) |
265 | - goto out; | |
265 | + goto out_free; | |
266 | 266 | |
267 | 267 | nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages); |
268 | 268 | if (nr != lpages) |
269 | - goto out; /* leave if some pages were missing */ | |
269 | + goto out_free_pages; /* leave if some pages were missing */ | |
270 | 270 | |
271 | 271 | /* check the pages for physical adjacency */ |
272 | 272 | ptr = pages; |
273 | 273 | |
... | ... | @@ -274,19 +274,18 @@ |
274 | 274 | page++; |
275 | 275 | for (loop = lpages; loop > 1; loop--) |
276 | 276 | if (*ptr++ != page++) |
277 | - goto out; | |
277 | + goto out_free_pages; | |
278 | 278 | |
279 | 279 | /* okay - all conditions fulfilled */ |
280 | 280 | ret = (unsigned long) page_address(pages[0]); |
281 | 281 | |
282 | - out: | |
283 | - if (pages) { | |
284 | - ptr = pages; | |
285 | - for (loop = lpages; loop > 0; loop--) | |
286 | - put_page(*ptr++); | |
287 | - kfree(pages); | |
288 | - } | |
289 | - | |
282 | +out_free_pages: | |
283 | + ptr = pages; | |
284 | + for (loop = nr; loop > 0; loop--) | |
285 | + put_page(*ptr++); | |
286 | +out_free: | |
287 | + kfree(pages); | |
288 | +out: | |
290 | 289 | return ret; |
291 | 290 | } |
292 | 291 |
include/asm-frv/mmu.h
include/asm-m32r/mmu.h
include/linux/mm.h
... | ... | @@ -56,19 +56,9 @@ |
56 | 56 | |
57 | 57 | extern struct kmem_cache *vm_area_cachep; |
58 | 58 | |
59 | -/* | |
60 | - * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is | |
61 | - * disabled, then there's a single shared list of VMAs maintained by the | |
62 | - * system, and mm's subscribe to these individually | |
63 | - */ | |
64 | -struct vm_list_struct { | |
65 | - struct vm_list_struct *next; | |
66 | - struct vm_area_struct *vma; | |
67 | -}; | |
68 | - | |
69 | 59 | #ifndef CONFIG_MMU |
70 | -extern struct rb_root nommu_vma_tree; | |
71 | -extern struct rw_semaphore nommu_vma_sem; | |
60 | +extern struct rb_root nommu_region_tree; | |
61 | +extern struct rw_semaphore nommu_region_sem; | |
72 | 62 | |
73 | 63 | extern unsigned int kobjsize(const void *objp); |
74 | 64 | #endif |
... | ... | @@ -1061,6 +1051,7 @@ |
1061 | 1051 | unsigned long, enum memmap_context); |
1062 | 1052 | extern void setup_per_zone_pages_min(void); |
1063 | 1053 | extern void mem_init(void); |
1054 | +extern void __init mmap_init(void); | |
1064 | 1055 | extern void show_mem(void); |
1065 | 1056 | extern void si_meminfo(struct sysinfo * val); |
1066 | 1057 | extern void si_meminfo_node(struct sysinfo *val, int nid); |
... | ... | @@ -1071,6 +1062,9 @@ |
1071 | 1062 | #else |
1072 | 1063 | static inline void setup_per_cpu_pageset(void) {} |
1073 | 1064 | #endif |
1065 | + | |
1066 | +/* nommu.c */ | |
1067 | +extern atomic_t mmap_pages_allocated; | |
1074 | 1068 | |
1075 | 1069 | /* prio_tree.c */ |
1076 | 1070 | void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); |
include/linux/mm_types.h
... | ... | @@ -97,6 +97,23 @@ |
97 | 97 | }; |
98 | 98 | |
99 | 99 | /* |
100 | + * A region containing a mapping of a non-memory backed file under NOMMU | |
101 | + * conditions. These are held in a global tree and are pinned by the VMAs that | |
102 | + * map parts of them. | |
103 | + */ | |
104 | +struct vm_region { | |
105 | + struct rb_node vm_rb; /* link in global region tree */ | |
106 | + unsigned long vm_flags; /* VMA vm_flags */ | |
107 | + unsigned long vm_start; /* start address of region */ | |
108 | + unsigned long vm_end; /* region initialised to here */ | |
109 | + unsigned long vm_top; /* region allocated to here */ | |
110 | + unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ | |
111 | + struct file *vm_file; /* the backing file or NULL */ | |
112 | + | |
113 | + atomic_t vm_usage; /* region usage count */ | |
114 | +}; | |
115 | + | |
116 | +/* | |
100 | 117 | * This struct defines a memory VMM memory area. There is one of these |
101 | 118 | * per VM-area/task. A VM area is any part of the process virtual memory |
102 | 119 | * space that has a special rule for the page-fault handlers (ie a shared |
... | ... | @@ -152,7 +169,7 @@ |
152 | 169 | unsigned long vm_truncate_count;/* truncate_count or restart_addr */ |
153 | 170 | |
154 | 171 | #ifndef CONFIG_MMU |
155 | - atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ | |
172 | + struct vm_region *vm_region; /* NOMMU mapping region */ | |
156 | 173 | #endif |
157 | 174 | #ifdef CONFIG_NUMA |
158 | 175 | struct mempolicy *vm_policy; /* NUMA policy for the VMA */ |
init/initramfs.c
ipc/shm.c
... | ... | @@ -990,6 +990,7 @@ |
990 | 990 | */ |
991 | 991 | vma = find_vma(mm, addr); |
992 | 992 | |
993 | +#ifdef CONFIG_MMU | |
993 | 994 | while (vma) { |
994 | 995 | next = vma->vm_next; |
995 | 996 | |
... | ... | @@ -1033,6 +1034,17 @@ |
1033 | 1034 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); |
1034 | 1035 | vma = next; |
1035 | 1036 | } |
1037 | + | |
1038 | +#else /* CONFIG_MMU */ | |
1039 | + /* under NOMMU conditions, the exact address to be destroyed must be | |
1040 | + * given */ | |
1041 | + retval = -EINVAL; | |
1042 | + if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { | |
1043 | + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); | |
1044 | + retval = 0; | |
1045 | + } | |
1046 | + | |
1047 | +#endif | |
1036 | 1048 | |
1037 | 1049 | up_write(&mm->mmap_sem); |
1038 | 1050 | return retval; |
kernel/fork.c
... | ... | @@ -1481,12 +1481,10 @@ |
1481 | 1481 | fs_cachep = kmem_cache_create("fs_cache", |
1482 | 1482 | sizeof(struct fs_struct), 0, |
1483 | 1483 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
1484 | - vm_area_cachep = kmem_cache_create("vm_area_struct", | |
1485 | - sizeof(struct vm_area_struct), 0, | |
1486 | - SLAB_PANIC, NULL); | |
1487 | 1484 | mm_cachep = kmem_cache_create("mm_struct", |
1488 | 1485 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
1489 | 1486 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
1487 | + mmap_init(); | |
1490 | 1488 | } |
1491 | 1489 | |
1492 | 1490 | /* |
kernel/sysctl.c
... | ... | @@ -82,6 +82,9 @@ |
82 | 82 | extern int compat_log; |
83 | 83 | extern int latencytop_enabled; |
84 | 84 | extern int sysctl_nr_open_min, sysctl_nr_open_max; |
85 | +#ifndef CONFIG_MMU | |
86 | +extern int sysctl_nr_trim_pages; | |
87 | +#endif | |
85 | 88 | #ifdef CONFIG_RCU_TORTURE_TEST |
86 | 89 | extern int rcutorture_runnable; |
87 | 90 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
... | ... | @@ -1101,6 +1104,17 @@ |
1101 | 1104 | .maxlen = sizeof(sysctl_max_map_count), |
1102 | 1105 | .mode = 0644, |
1103 | 1106 | .proc_handler = &proc_dointvec |
1107 | + }, | |
1108 | +#else | |
1109 | + { | |
1110 | + .ctl_name = CTL_UNNUMBERED, | |
1111 | + .procname = "nr_trim_pages", | |
1112 | + .data = &sysctl_nr_trim_pages, | |
1113 | + .maxlen = sizeof(sysctl_nr_trim_pages), | |
1114 | + .mode = 0644, | |
1115 | + .proc_handler = &proc_dointvec_minmax, | |
1116 | + .strategy = &sysctl_intvec, | |
1117 | + .extra1 = &zero, | |
1104 | 1118 | }, |
1105 | 1119 | #endif |
1106 | 1120 | { |
lib/Kconfig.debug
... | ... | @@ -512,6 +512,13 @@ |
512 | 512 | |
513 | 513 | If unsure, say N. |
514 | 514 | |
515 | +config DEBUG_NOMMU_REGIONS | |
516 | + bool "Debug the global anon/private NOMMU mapping region tree" | |
517 | + depends on DEBUG_KERNEL && !MMU | |
518 | + help | |
519 | + This option causes the global tree of anonymous and private mapping | |
520 | + regions to be regularly checked for invalid topology. | |
521 | + | |
515 | 522 | config DEBUG_WRITECOUNT |
516 | 523 | bool "Debug filesystem writers count" |
517 | 524 | depends on DEBUG_KERNEL |
mm/mmap.c
... | ... | @@ -2472,4 +2472,14 @@ |
2472 | 2472 | |
2473 | 2473 | mutex_unlock(&mm_all_locks_mutex); |
2474 | 2474 | } |
2475 | + | |
2476 | +/* | |
2477 | + * initialise the VMA slab | |
2478 | + */ | |
2479 | +void __init mmap_init(void) | |
2480 | +{ | |
2481 | + vm_area_cachep = kmem_cache_create("vm_area_struct", | |
2482 | + sizeof(struct vm_area_struct), 0, | |
2483 | + SLAB_PANIC, NULL); | |
2484 | +} |
mm/nommu.c
Changes suppressed. Click to show
... | ... | @@ -6,11 +6,11 @@ |
6 | 6 | * |
7 | 7 | * See Documentation/nommu-mmap.txt |
8 | 8 | * |
9 | - * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com> | |
9 | + * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> | |
10 | 10 | * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> |
11 | 11 | * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> |
12 | 12 | * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> |
13 | - * Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org> | |
13 | + * Copyright (c) 2007-2008 Paul Mundt <lethal@linux-sh.org> | |
14 | 14 | */ |
15 | 15 | |
16 | 16 | #include <linux/module.h> |
17 | 17 | |
18 | 18 | |
19 | 19 | |
20 | 20 | |
21 | 21 | |
... | ... | @@ -33,26 +33,51 @@ |
33 | 33 | #include <asm/uaccess.h> |
34 | 34 | #include <asm/tlb.h> |
35 | 35 | #include <asm/tlbflush.h> |
36 | +#include "internal.h" | |
36 | 37 | |
38 | +static inline __attribute__((format(printf, 1, 2))) | |
39 | +void no_printk(const char *fmt, ...) | |
40 | +{ | |
41 | +} | |
42 | + | |
43 | +#if 0 | |
44 | +#define kenter(FMT, ...) \ | |
45 | + printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) | |
46 | +#define kleave(FMT, ...) \ | |
47 | + printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) | |
48 | +#define kdebug(FMT, ...) \ | |
49 | + printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__) | |
50 | +#else | |
51 | +#define kenter(FMT, ...) \ | |
52 | + no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) | |
53 | +#define kleave(FMT, ...) \ | |
54 | + no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) | |
55 | +#define kdebug(FMT, ...) \ | |
56 | + no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__) | |
57 | +#endif | |
58 | + | |
37 | 59 | #include "internal.h" |
38 | 60 | |
39 | 61 | void *high_memory; |
40 | 62 | struct page *mem_map; |
41 | 63 | unsigned long max_mapnr; |
42 | 64 | unsigned long num_physpages; |
43 | -unsigned long askedalloc, realalloc; | |
44 | 65 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); |
45 | 66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
46 | 67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
47 | 68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
69 | +int sysctl_nr_trim_pages = 1; /* page trimming behaviour */ | |
48 | 70 | int heap_stack_gap = 0; |
49 | 71 | |
72 | +atomic_t mmap_pages_allocated; | |
73 | + | |
50 | 74 | EXPORT_SYMBOL(mem_map); |
51 | 75 | EXPORT_SYMBOL(num_physpages); |
52 | 76 | |
53 | -/* list of shareable VMAs */ | |
54 | -struct rb_root nommu_vma_tree = RB_ROOT; | |
55 | -DECLARE_RWSEM(nommu_vma_sem); | |
77 | +/* list of mapped, potentially shareable regions */ | |
78 | +static struct kmem_cache *vm_region_jar; | |
79 | +struct rb_root nommu_region_tree = RB_ROOT; | |
80 | +DECLARE_RWSEM(nommu_region_sem); | |
56 | 81 | |
57 | 82 | struct vm_operations_struct generic_file_vm_ops = { |
58 | 83 | }; |
... | ... | @@ -124,6 +149,20 @@ |
124 | 149 | return ksize(objp); |
125 | 150 | |
126 | 151 | /* |
152 | + * If it's not a compound page, see if we have a matching VMA | |
153 | + * region. This test is intentionally done in reverse order, | |
154 | + * so if there's no VMA, we still fall through and hand back | |
155 | + * PAGE_SIZE for 0-order pages. | |
156 | + */ | |
157 | + if (!PageCompound(page)) { | |
158 | + struct vm_area_struct *vma; | |
159 | + | |
160 | + vma = find_vma(current->mm, (unsigned long)objp); | |
161 | + if (vma) | |
162 | + return vma->vm_end - vma->vm_start; | |
163 | + } | |
164 | + | |
165 | + /* | |
127 | 166 | * The ksize() function is only guaranteed to work for pointers |
128 | 167 | * returned by kmalloc(). So handle arbitrary pointers here. |
129 | 168 | */ |
130 | 169 | |
131 | 170 | |
132 | 171 | |
133 | 172 | |
134 | 173 | |
135 | 174 | |
136 | 175 | |
137 | 176 | |
138 | 177 | |
139 | 178 | |
140 | 179 | |
141 | 180 | |
142 | 181 | |
143 | 182 | |
144 | 183 | |
145 | 184 | |
146 | 185 | |
147 | 186 | |
148 | 187 | |
149 | 188 | |
150 | 189 | |
151 | 190 | |
152 | 191 | |
153 | 192 | |
154 | 193 | |
155 | 194 | |
156 | 195 | |
157 | 196 | |
158 | 197 | |
159 | 198 | |
160 | 199 | |
161 | 200 | |
162 | 201 | |
163 | 202 | |
164 | 203 | |
165 | 204 | |
... | ... | @@ -401,130 +440,179 @@ |
401 | 440 | return mm->brk = brk; |
402 | 441 | } |
403 | 442 | |
404 | -#ifdef DEBUG | |
405 | -static void show_process_blocks(void) | |
443 | +/* | |
444 | + * initialise the VMA and region record slabs | |
445 | + */ | |
446 | +void __init mmap_init(void) | |
406 | 447 | { |
407 | - struct vm_list_struct *vml; | |
408 | - | |
409 | - printk("Process blocks %d:", current->pid); | |
410 | - | |
411 | - for (vml = ¤t->mm->context.vmlist; vml; vml = vml->next) { | |
412 | - printk(" %p: %p", vml, vml->vma); | |
413 | - if (vml->vma) | |
414 | - printk(" (%d @%lx #%d)", | |
415 | - kobjsize((void *) vml->vma->vm_start), | |
416 | - vml->vma->vm_start, | |
417 | - atomic_read(&vml->vma->vm_usage)); | |
418 | - printk(vml->next ? " ->" : ".\n"); | |
419 | - } | |
448 | + vm_region_jar = kmem_cache_create("vm_region_jar", | |
449 | + sizeof(struct vm_region), 0, | |
450 | + SLAB_PANIC, NULL); | |
451 | + vm_area_cachep = kmem_cache_create("vm_area_struct", | |
452 | + sizeof(struct vm_area_struct), 0, | |
453 | + SLAB_PANIC, NULL); | |
420 | 454 | } |
421 | -#endif /* DEBUG */ | |
422 | 455 | |
423 | 456 | /* |
424 | - * add a VMA into a process's mm_struct in the appropriate place in the list | |
425 | - * - should be called with mm->mmap_sem held writelocked | |
457 | + * validate the region tree | |
458 | + * - the caller must hold the region lock | |
426 | 459 | */ |
427 | -static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) | |
460 | +#ifdef CONFIG_DEBUG_NOMMU_REGIONS | |
461 | +static noinline void validate_nommu_regions(void) | |
428 | 462 | { |
429 | - struct vm_list_struct **ppv; | |
463 | + struct vm_region *region, *last; | |
464 | + struct rb_node *p, *lastp; | |
430 | 465 | |
431 | - for (ppv = ¤t->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) | |
432 | - if ((*ppv)->vma->vm_start > vml->vma->vm_start) | |
433 | - break; | |
466 | + lastp = rb_first(&nommu_region_tree); | |
467 | + if (!lastp) | |
468 | + return; | |
434 | 469 | |
435 | - vml->next = *ppv; | |
436 | - *ppv = vml; | |
470 | + last = rb_entry(lastp, struct vm_region, vm_rb); | |
471 | + if (unlikely(last->vm_end <= last->vm_start)) | |
472 | + BUG(); | |
473 | + if (unlikely(last->vm_top < last->vm_end)) | |
474 | + BUG(); | |
475 | + | |
476 | + while ((p = rb_next(lastp))) { | |
477 | + region = rb_entry(p, struct vm_region, vm_rb); | |
478 | + last = rb_entry(lastp, struct vm_region, vm_rb); | |
479 | + | |
480 | + if (unlikely(region->vm_end <= region->vm_start)) | |
481 | + BUG(); | |
482 | + if (unlikely(region->vm_top < region->vm_end)) | |
483 | + BUG(); | |
484 | + if (unlikely(region->vm_start < last->vm_top)) | |
485 | + BUG(); | |
486 | + | |
487 | + lastp = p; | |
488 | + } | |
437 | 489 | } |
490 | +#else | |
491 | +#define validate_nommu_regions() do {} while(0) | |
492 | +#endif | |
438 | 493 | |
439 | 494 | /* |
440 | - * look up the first VMA in which addr resides, NULL if none | |
441 | - * - should be called with mm->mmap_sem at least held readlocked | |
495 | + * add a region into the global tree | |
442 | 496 | */ |
443 | -struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) | |
497 | +static void add_nommu_region(struct vm_region *region) | |
444 | 498 | { |
445 | - struct vm_list_struct *loop, *vml; | |
499 | + struct vm_region *pregion; | |
500 | + struct rb_node **p, *parent; | |
446 | 501 | |
447 | - /* search the vm_start ordered list */ | |
448 | - vml = NULL; | |
449 | - for (loop = mm->context.vmlist; loop; loop = loop->next) { | |
450 | - if (loop->vma->vm_start > addr) | |
451 | - break; | |
452 | - vml = loop; | |
502 | + validate_nommu_regions(); | |
503 | + | |
504 | + BUG_ON(region->vm_start & ~PAGE_MASK); | |
505 | + | |
506 | + parent = NULL; | |
507 | + p = &nommu_region_tree.rb_node; | |
508 | + while (*p) { | |
509 | + parent = *p; | |
510 | + pregion = rb_entry(parent, struct vm_region, vm_rb); | |
511 | + if (region->vm_start < pregion->vm_start) | |
512 | + p = &(*p)->rb_left; | |
513 | + else if (region->vm_start > pregion->vm_start) | |
514 | + p = &(*p)->rb_right; | |
515 | + else if (pregion == region) | |
516 | + return; | |
517 | + else | |
518 | + BUG(); | |
453 | 519 | } |
454 | 520 | |
455 | - if (vml && vml->vma->vm_end > addr) | |
456 | - return vml->vma; | |
521 | + rb_link_node(®ion->vm_rb, parent, p); | |
522 | + rb_insert_color(®ion->vm_rb, &nommu_region_tree); | |
457 | 523 | |
458 | - return NULL; | |
524 | + validate_nommu_regions(); | |
459 | 525 | } |
460 | -EXPORT_SYMBOL(find_vma); | |
461 | 526 | |
462 | 527 | /* |
463 | - * find a VMA | |
464 | - * - we don't extend stack VMAs under NOMMU conditions | |
528 | + * delete a region from the global tree | |
465 | 529 | */ |
466 | -struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) | |
530 | +static void delete_nommu_region(struct vm_region *region) | |
467 | 531 | { |
468 | - return find_vma(mm, addr); | |
469 | -} | |
532 | + BUG_ON(!nommu_region_tree.rb_node); | |
470 | 533 | |
471 | -int expand_stack(struct vm_area_struct *vma, unsigned long address) | |
472 | -{ | |
473 | - return -ENOMEM; | |
534 | + validate_nommu_regions(); | |
535 | + rb_erase(®ion->vm_rb, &nommu_region_tree); | |
536 | + validate_nommu_regions(); | |
474 | 537 | } |
475 | 538 | |
476 | 539 | /* |
477 | - * look up the first VMA exactly that exactly matches addr | |
478 | - * - should be called with mm->mmap_sem at least held readlocked | |
540 | + * free a contiguous series of pages | |
479 | 541 | */ |
480 | -static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, | |
481 | - unsigned long addr) | |
542 | +static void free_page_series(unsigned long from, unsigned long to) | |
482 | 543 | { |
483 | - struct vm_list_struct *vml; | |
544 | + for (; from < to; from += PAGE_SIZE) { | |
545 | + struct page *page = virt_to_page(from); | |
484 | 546 | |
485 | - /* search the vm_start ordered list */ | |
486 | - for (vml = mm->context.vmlist; vml; vml = vml->next) { | |
487 | - if (vml->vma->vm_start == addr) | |
488 | - return vml->vma; | |
489 | - if (vml->vma->vm_start > addr) | |
490 | - break; | |
547 | + kdebug("- free %lx", from); | |
548 | + atomic_dec(&mmap_pages_allocated); | |
549 | + if (page_count(page) != 1) | |
550 | + kdebug("free page %p [%d]", page, page_count(page)); | |
551 | + put_page(page); | |
491 | 552 | } |
492 | - | |
493 | - return NULL; | |
494 | 553 | } |
495 | 554 | |
496 | 555 | /* |
497 | - * find a VMA in the global tree | |
556 | + * release a reference to a region | |
557 | + * - the caller must hold the region semaphore, which this releases | |
558 | + * - the region may not have been added to the tree yet, in which case vm_top | |
559 | + * will equal vm_start | |
498 | 560 | */ |
499 | -static inline struct vm_area_struct *find_nommu_vma(unsigned long start) | |
561 | +static void __put_nommu_region(struct vm_region *region) | |
562 | + __releases(nommu_region_sem) | |
500 | 563 | { |
501 | - struct vm_area_struct *vma; | |
502 | - struct rb_node *n = nommu_vma_tree.rb_node; | |
564 | + kenter("%p{%d}", region, atomic_read(®ion->vm_usage)); | |
503 | 565 | |
504 | - while (n) { | |
505 | - vma = rb_entry(n, struct vm_area_struct, vm_rb); | |
566 | + BUG_ON(!nommu_region_tree.rb_node); | |
506 | 567 | |
507 | - if (start < vma->vm_start) | |
508 | - n = n->rb_left; | |
509 | - else if (start > vma->vm_start) | |
510 | - n = n->rb_right; | |
511 | - else | |
512 | - return vma; | |
568 | + if (atomic_dec_and_test(®ion->vm_usage)) { | |
569 | + if (region->vm_top > region->vm_start) | |
570 | + delete_nommu_region(region); | |
571 | + up_write(&nommu_region_sem); | |
572 | + | |
573 | + if (region->vm_file) | |
574 | + fput(region->vm_file); | |
575 | + | |
576 | + /* IO memory and memory shared directly out of the pagecache | |
577 | + * from ramfs/tmpfs mustn't be released here */ | |
578 | + if (region->vm_flags & VM_MAPPED_COPY) { | |
579 | + kdebug("free series"); | |
580 | + free_page_series(region->vm_start, region->vm_top); | |
581 | + } | |
582 | + kmem_cache_free(vm_region_jar, region); | |
583 | + } else { | |
584 | + up_write(&nommu_region_sem); | |
513 | 585 | } |
586 | +} | |
514 | 587 | |
515 | - return NULL; | |
588 | +/* | |
589 | + * release a reference to a region | |
590 | + */ | |
591 | +static void put_nommu_region(struct vm_region *region) | |
592 | +{ | |
593 | + down_write(&nommu_region_sem); | |
594 | + __put_nommu_region(region); | |
516 | 595 | } |
517 | 596 | |
518 | 597 | /* |
519 | - * add a VMA in the global tree | |
598 | + * add a VMA into a process's mm_struct in the appropriate place in the list | |
599 | + * and tree and add to the address space's page tree also if not an anonymous | |
600 | + * page | |
601 | + * - should be called with mm->mmap_sem held writelocked | |
520 | 602 | */ |
521 | -static void add_nommu_vma(struct vm_area_struct *vma) | |
603 | +static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) | |
522 | 604 | { |
523 | - struct vm_area_struct *pvma; | |
605 | + struct vm_area_struct *pvma, **pp; | |
524 | 606 | struct address_space *mapping; |
525 | - struct rb_node **p = &nommu_vma_tree.rb_node; | |
526 | - struct rb_node *parent = NULL; | |
607 | + struct rb_node **p, *parent; | |
527 | 608 | |
609 | + kenter(",%p", vma); | |
610 | + | |
611 | + BUG_ON(!vma->vm_region); | |
612 | + | |
613 | + mm->map_count++; | |
614 | + vma->vm_mm = mm; | |
615 | + | |
528 | 616 | /* add the VMA to the mapping */ |
529 | 617 | if (vma->vm_file) { |
530 | 618 | mapping = vma->vm_file->f_mapping; |
531 | 619 | |
532 | 620 | |
533 | 621 | |
534 | 622 | |
535 | 623 | |
536 | 624 | |
537 | 625 | |
538 | 626 | |
539 | 627 | |
... | ... | @@ -534,43 +622,63 @@ |
534 | 622 | flush_dcache_mmap_unlock(mapping); |
535 | 623 | } |
536 | 624 | |
537 | - /* add the VMA to the master list */ | |
625 | + /* add the VMA to the tree */ | |
626 | + parent = NULL; | |
627 | + p = &mm->mm_rb.rb_node; | |
538 | 628 | while (*p) { |
539 | 629 | parent = *p; |
540 | 630 | pvma = rb_entry(parent, struct vm_area_struct, vm_rb); |
541 | 631 | |
542 | - if (vma->vm_start < pvma->vm_start) { | |
632 | + /* sort by: start addr, end addr, VMA struct addr in that order | |
633 | + * (the latter is necessary as we may get identical VMAs) */ | |
634 | + if (vma->vm_start < pvma->vm_start) | |
543 | 635 | p = &(*p)->rb_left; |
544 | - } | |
545 | - else if (vma->vm_start > pvma->vm_start) { | |
636 | + else if (vma->vm_start > pvma->vm_start) | |
546 | 637 | p = &(*p)->rb_right; |
547 | - } | |
548 | - else { | |
549 | - /* mappings are at the same address - this can only | |
550 | - * happen for shared-mem chardevs and shared file | |
551 | - * mappings backed by ramfs/tmpfs */ | |
552 | - BUG_ON(!(pvma->vm_flags & VM_SHARED)); | |
553 | - | |
554 | - if (vma < pvma) | |
555 | - p = &(*p)->rb_left; | |
556 | - else if (vma > pvma) | |
557 | - p = &(*p)->rb_right; | |
558 | - else | |
559 | - BUG(); | |
560 | - } | |
638 | + else if (vma->vm_end < pvma->vm_end) | |
639 | + p = &(*p)->rb_left; | |
640 | + else if (vma->vm_end > pvma->vm_end) | |
641 | + p = &(*p)->rb_right; | |
642 | + else if (vma < pvma) | |
643 | + p = &(*p)->rb_left; | |
644 | + else if (vma > pvma) | |
645 | + p = &(*p)->rb_right; | |
646 | + else | |
647 | + BUG(); | |
561 | 648 | } |
562 | 649 | |
563 | 650 | rb_link_node(&vma->vm_rb, parent, p); |
564 | - rb_insert_color(&vma->vm_rb, &nommu_vma_tree); | |
651 | + rb_insert_color(&vma->vm_rb, &mm->mm_rb); | |
652 | + | |
653 | + /* add VMA to the VMA list also */ | |
654 | + for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) { | |
655 | + if (pvma->vm_start > vma->vm_start) | |
656 | + break; | |
657 | + if (pvma->vm_start < vma->vm_start) | |
658 | + continue; | |
659 | + if (pvma->vm_end < vma->vm_end) | |
660 | + break; | |
661 | + } | |
662 | + | |
663 | + vma->vm_next = *pp; | |
664 | + *pp = vma; | |
565 | 665 | } |
566 | 666 | |
567 | 667 | /* |
568 | - * delete a VMA from the global list | |
668 | + * delete a VMA from its owning mm_struct and address space | |
569 | 669 | */ |
570 | -static void delete_nommu_vma(struct vm_area_struct *vma) | |
670 | +static void delete_vma_from_mm(struct vm_area_struct *vma) | |
571 | 671 | { |
672 | + struct vm_area_struct **pp; | |
572 | 673 | struct address_space *mapping; |
674 | + struct mm_struct *mm = vma->vm_mm; | |
573 | 675 | |
676 | + kenter("%p", vma); | |
677 | + | |
678 | + mm->map_count--; | |
679 | + if (mm->mmap_cache == vma) | |
680 | + mm->mmap_cache = NULL; | |
681 | + | |
574 | 682 | /* remove the VMA from the mapping */ |
575 | 683 | if (vma->vm_file) { |
576 | 684 | mapping = vma->vm_file->f_mapping; |
577 | 685 | |
... | ... | @@ -580,11 +688,118 @@ |
580 | 688 | flush_dcache_mmap_unlock(mapping); |
581 | 689 | } |
582 | 690 | |
583 | - /* remove from the master list */ | |
584 | - rb_erase(&vma->vm_rb, &nommu_vma_tree); | |
691 | + /* remove from the MM's tree and list */ | |
692 | + rb_erase(&vma->vm_rb, &mm->mm_rb); | |
693 | + for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) { | |
694 | + if (*pp == vma) { | |
695 | + *pp = vma->vm_next; | |
696 | + break; | |
697 | + } | |
698 | + } | |
699 | + | |
700 | + vma->vm_mm = NULL; | |
585 | 701 | } |
586 | 702 | |
587 | 703 | /* |
704 | + * destroy a VMA record | |
705 | + */ | |
706 | +static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) | |
707 | +{ | |
708 | + kenter("%p", vma); | |
709 | + if (vma->vm_ops && vma->vm_ops->close) | |
710 | + vma->vm_ops->close(vma); | |
711 | + if (vma->vm_file) { | |
712 | + fput(vma->vm_file); | |
713 | + if (vma->vm_flags & VM_EXECUTABLE) | |
714 | + removed_exe_file_vma(mm); | |
715 | + } | |
716 | + put_nommu_region(vma->vm_region); | |
717 | + kmem_cache_free(vm_area_cachep, vma); | |
718 | +} | |
719 | + | |
720 | +/* | |
721 | + * look up the first VMA in which addr resides, NULL if none | |
722 | + * - should be called with mm->mmap_sem at least held readlocked | |
723 | + */ | |
724 | +struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) | |
725 | +{ | |
726 | + struct vm_area_struct *vma; | |
727 | + struct rb_node *n = mm->mm_rb.rb_node; | |
728 | + | |
729 | + /* check the cache first */ | |
730 | + vma = mm->mmap_cache; | |
731 | + if (vma && vma->vm_start <= addr && vma->vm_end > addr) | |
732 | + return vma; | |
733 | + | |
734 | + /* trawl the tree (there may be multiple mappings in which addr | |
735 | + * resides) */ | |
736 | + for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { | |
737 | + vma = rb_entry(n, struct vm_area_struct, vm_rb); | |
738 | + if (vma->vm_start > addr) | |
739 | + return NULL; | |
740 | + if (vma->vm_end > addr) { | |
741 | + mm->mmap_cache = vma; | |
742 | + return vma; | |
743 | + } | |
744 | + } | |
745 | + | |
746 | + return NULL; | |
747 | +} | |
748 | +EXPORT_SYMBOL(find_vma); | |
749 | + | |
750 | +/* | |
751 | + * find a VMA | |
752 | + * - we don't extend stack VMAs under NOMMU conditions | |
753 | + */ | |
754 | +struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) | |
755 | +{ | |
756 | + return find_vma(mm, addr); | |
757 | +} | |
758 | + | |
759 | +/* | |
760 | + * expand a stack to a given address | |
761 | + * - not supported under NOMMU conditions | |
762 | + */ | |
763 | +int expand_stack(struct vm_area_struct *vma, unsigned long address) | |
764 | +{ | |
765 | + return -ENOMEM; | |
766 | +} | |
767 | + | |
768 | +/* | |
769 | + * look up the first VMA exactly that exactly matches addr | |
770 | + * - should be called with mm->mmap_sem at least held readlocked | |
771 | + */ | |
772 | +static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, | |
773 | + unsigned long addr, | |
774 | + unsigned long len) | |
775 | +{ | |
776 | + struct vm_area_struct *vma; | |
777 | + struct rb_node *n = mm->mm_rb.rb_node; | |
778 | + unsigned long end = addr + len; | |
779 | + | |
780 | + /* check the cache first */ | |
781 | + vma = mm->mmap_cache; | |
782 | + if (vma && vma->vm_start == addr && vma->vm_end == end) | |
783 | + return vma; | |
784 | + | |
785 | + /* trawl the tree (there may be multiple mappings in which addr | |
786 | + * resides) */ | |
787 | + for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { | |
788 | + vma = rb_entry(n, struct vm_area_struct, vm_rb); | |
789 | + if (vma->vm_start < addr) | |
790 | + continue; | |
791 | + if (vma->vm_start > addr) | |
792 | + return NULL; | |
793 | + if (vma->vm_end == end) { | |
794 | + mm->mmap_cache = vma; | |
795 | + return vma; | |
796 | + } | |
797 | + } | |
798 | + | |
799 | + return NULL; | |
800 | +} | |
801 | + | |
802 | +/* | |
588 | 803 | * determine whether a mapping should be permitted and, if so, what sort of |
589 | 804 | * mapping we're capable of supporting |
590 | 805 | */ |
... | ... | @@ -596,7 +811,7 @@ |
596 | 811 | unsigned long pgoff, |
597 | 812 | unsigned long *_capabilities) |
598 | 813 | { |
599 | - unsigned long capabilities; | |
814 | + unsigned long capabilities, rlen; | |
600 | 815 | unsigned long reqprot = prot; |
601 | 816 | int ret; |
602 | 817 | |
603 | 818 | |
... | ... | @@ -616,12 +831,12 @@ |
616 | 831 | return -EINVAL; |
617 | 832 | |
618 | 833 | /* Careful about overflows.. */ |
619 | - len = PAGE_ALIGN(len); | |
620 | - if (!len || len > TASK_SIZE) | |
834 | + rlen = PAGE_ALIGN(len); | |
835 | + if (!rlen || rlen > TASK_SIZE) | |
621 | 836 | return -ENOMEM; |
622 | 837 | |
623 | 838 | /* offset overflow? */ |
624 | - if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) | |
839 | + if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) | |
625 | 840 | return -EOVERFLOW; |
626 | 841 | |
627 | 842 | if (file) { |
628 | 843 | |
629 | 844 | |
... | ... | @@ -795,13 +1010,18 @@ |
795 | 1010 | } |
796 | 1011 | |
797 | 1012 | /* |
798 | - * set up a shared mapping on a file | |
1013 | + * set up a shared mapping on a file (the driver or filesystem provides and | |
1014 | + * pins the storage) | |
799 | 1015 | */ |
800 | -static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) | |
1016 | +static int do_mmap_shared_file(struct vm_area_struct *vma) | |
801 | 1017 | { |
802 | 1018 | int ret; |
803 | 1019 | |
804 | 1020 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); |
1021 | + if (ret == 0) { | |
1022 | + vma->vm_region->vm_top = vma->vm_region->vm_end; | |
1023 | + return ret; | |
1024 | + } | |
805 | 1025 | if (ret != -ENOSYS) |
806 | 1026 | return ret; |
807 | 1027 | |
808 | 1028 | |
809 | 1029 | |
... | ... | @@ -815,10 +1035,14 @@ |
815 | 1035 | /* |
816 | 1036 | * set up a private mapping or an anonymous shared mapping |
817 | 1037 | */ |
818 | -static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) | |
1038 | +static int do_mmap_private(struct vm_area_struct *vma, | |
1039 | + struct vm_region *region, | |
1040 | + unsigned long len) | |
819 | 1041 | { |
1042 | + struct page *pages; | |
1043 | + unsigned long total, point, n, rlen; | |
820 | 1044 | void *base; |
821 | - int ret; | |
1045 | + int ret, order; | |
822 | 1046 | |
823 | 1047 | /* invoke the file's mapping function so that it can keep track of |
824 | 1048 | * shared mappings on devices or memory |
825 | 1049 | |
826 | 1050 | |
827 | 1051 | |
828 | 1052 | |
829 | 1053 | |
830 | 1054 | |
831 | 1055 | |
... | ... | @@ -826,35 +1050,64 @@ |
826 | 1050 | */ |
827 | 1051 | if (vma->vm_file) { |
828 | 1052 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); |
829 | - if (ret != -ENOSYS) { | |
1053 | + if (ret == 0) { | |
830 | 1054 | /* shouldn't return success if we're not sharing */ |
831 | - BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE)); | |
832 | - return ret; /* success or a real error */ | |
1055 | + BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); | |
1056 | + vma->vm_region->vm_top = vma->vm_region->vm_end; | |
1057 | + return ret; | |
833 | 1058 | } |
1059 | + if (ret != -ENOSYS) | |
1060 | + return ret; | |
834 | 1061 | |
835 | 1062 | /* getting an ENOSYS error indicates that direct mmap isn't |
836 | 1063 | * possible (as opposed to tried but failed) so we'll try to |
837 | 1064 | * make a private copy of the data and map that instead */ |
838 | 1065 | } |
839 | 1066 | |
1067 | + rlen = PAGE_ALIGN(len); | |
1068 | + | |
840 | 1069 | /* allocate some memory to hold the mapping |
841 | 1070 | * - note that this may not return a page-aligned address if the object |
842 | 1071 | * we're allocating is smaller than a page |
843 | 1072 | */ |
844 | - base = kmalloc(len, GFP_KERNEL|__GFP_COMP); | |
845 | - if (!base) | |
1073 | + order = get_order(rlen); | |
1074 | + kdebug("alloc order %d for %lx", order, len); | |
1075 | + | |
1076 | + pages = alloc_pages(GFP_KERNEL, order); | |
1077 | + if (!pages) | |
846 | 1078 | goto enomem; |
847 | 1079 | |
848 | - vma->vm_start = (unsigned long) base; | |
849 | - vma->vm_end = vma->vm_start + len; | |
850 | - vma->vm_flags |= VM_MAPPED_COPY; | |
1080 | + total = 1 << order; | |
1081 | + atomic_add(total, &mmap_pages_allocated); | |
851 | 1082 | |
852 | -#ifdef WARN_ON_SLACK | |
853 | - if (len + WARN_ON_SLACK <= kobjsize(result)) | |
854 | - printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", | |
855 | - len, current->pid, kobjsize(result) - len); | |
856 | -#endif | |
1083 | + point = rlen >> PAGE_SHIFT; | |
857 | 1084 | |
1085 | + /* we allocated a power-of-2 sized page set, so we may want to trim off | |
1086 | + * the excess */ | |
1087 | + if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) { | |
1088 | + while (total > point) { | |
1089 | + order = ilog2(total - point); | |
1090 | + n = 1 << order; | |
1091 | + kdebug("shave %lu/%lu @%lu", n, total - point, total); | |
1092 | + atomic_sub(n, &mmap_pages_allocated); | |
1093 | + total -= n; | |
1094 | + set_page_refcounted(pages + total); | |
1095 | + __free_pages(pages + total, order); | |
1096 | + } | |
1097 | + } | |
1098 | + | |
1099 | + for (point = 1; point < total; point++) | |
1100 | + set_page_refcounted(&pages[point]); | |
1101 | + | |
1102 | + base = page_address(pages); | |
1103 | + region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; | |
1104 | + region->vm_start = (unsigned long) base; | |
1105 | + region->vm_end = region->vm_start + rlen; | |
1106 | + region->vm_top = region->vm_start + (total << PAGE_SHIFT); | |
1107 | + | |
1108 | + vma->vm_start = region->vm_start; | |
1109 | + vma->vm_end = region->vm_start + len; | |
1110 | + | |
858 | 1111 | if (vma->vm_file) { |
859 | 1112 | /* read the contents of a file into the copy */ |
860 | 1113 | mm_segment_t old_fs; |
861 | 1114 | |
862 | 1115 | |
863 | 1116 | |
... | ... | @@ -865,26 +1118,28 @@ |
865 | 1118 | |
866 | 1119 | old_fs = get_fs(); |
867 | 1120 | set_fs(KERNEL_DS); |
868 | - ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos); | |
1121 | + ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos); | |
869 | 1122 | set_fs(old_fs); |
870 | 1123 | |
871 | 1124 | if (ret < 0) |
872 | 1125 | goto error_free; |
873 | 1126 | |
874 | 1127 | /* clear the last little bit */ |
875 | - if (ret < len) | |
876 | - memset(base + ret, 0, len - ret); | |
1128 | + if (ret < rlen) | |
1129 | + memset(base + ret, 0, rlen - ret); | |
877 | 1130 | |
878 | 1131 | } else { |
879 | 1132 | /* if it's an anonymous mapping, then just clear it */ |
880 | - memset(base, 0, len); | |
1133 | + memset(base, 0, rlen); | |
881 | 1134 | } |
882 | 1135 | |
883 | 1136 | return 0; |
884 | 1137 | |
885 | 1138 | error_free: |
886 | - kfree(base); | |
887 | - vma->vm_start = 0; | |
1139 | + free_page_series(region->vm_start, region->vm_end); | |
1140 | + region->vm_start = vma->vm_start = 0; | |
1141 | + region->vm_end = vma->vm_end = 0; | |
1142 | + region->vm_top = 0; | |
888 | 1143 | return ret; |
889 | 1144 | |
890 | 1145 | enomem: |
891 | 1146 | |
892 | 1147 | |
... | ... | @@ -904,13 +1159,14 @@ |
904 | 1159 | unsigned long flags, |
905 | 1160 | unsigned long pgoff) |
906 | 1161 | { |
907 | - struct vm_list_struct *vml = NULL; | |
908 | - struct vm_area_struct *vma = NULL; | |
1162 | + struct vm_area_struct *vma; | |
1163 | + struct vm_region *region; | |
909 | 1164 | struct rb_node *rb; |
910 | - unsigned long capabilities, vm_flags; | |
911 | - void *result; | |
1165 | + unsigned long capabilities, vm_flags, result; | |
912 | 1166 | int ret; |
913 | 1167 | |
1168 | + kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); | |
1169 | + | |
914 | 1170 | if (!(flags & MAP_FIXED)) |
915 | 1171 | addr = round_hint_to_min(addr); |
916 | 1172 | |
917 | 1173 | |
918 | 1174 | |
919 | 1175 | |
920 | 1176 | |
921 | 1177 | |
922 | 1178 | |
923 | 1179 | |
924 | 1180 | |
925 | 1181 | |
926 | 1182 | |
927 | 1183 | |
928 | 1184 | |
929 | 1185 | |
930 | 1186 | |
931 | 1187 | |
932 | 1188 | |
... | ... | @@ -918,73 +1174,120 @@ |
918 | 1174 | * mapping */ |
919 | 1175 | ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, |
920 | 1176 | &capabilities); |
921 | - if (ret < 0) | |
1177 | + if (ret < 0) { | |
1178 | + kleave(" = %d [val]", ret); | |
922 | 1179 | return ret; |
1180 | + } | |
923 | 1181 | |
924 | 1182 | /* we've determined that we can make the mapping, now translate what we |
925 | 1183 | * now know into VMA flags */ |
926 | 1184 | vm_flags = determine_vm_flags(file, prot, flags, capabilities); |
927 | 1185 | |
928 | - /* we're going to need to record the mapping if it works */ | |
929 | - vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); | |
930 | - if (!vml) | |
931 | - goto error_getting_vml; | |
1186 | + /* we're going to need to record the mapping */ | |
1187 | + region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); | |
1188 | + if (!region) | |
1189 | + goto error_getting_region; | |
932 | 1190 | |
933 | - down_write(&nommu_vma_sem); | |
1191 | + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); | |
1192 | + if (!vma) | |
1193 | + goto error_getting_vma; | |
934 | 1194 | |
935 | - /* if we want to share, we need to check for VMAs created by other | |
1195 | + atomic_set(®ion->vm_usage, 1); | |
1196 | + region->vm_flags = vm_flags; | |
1197 | + region->vm_pgoff = pgoff; | |
1198 | + | |
1199 | + INIT_LIST_HEAD(&vma->anon_vma_node); | |
1200 | + vma->vm_flags = vm_flags; | |
1201 | + vma->vm_pgoff = pgoff; | |
1202 | + | |
1203 | + if (file) { | |
1204 | + region->vm_file = file; | |
1205 | + get_file(file); | |
1206 | + vma->vm_file = file; | |
1207 | + get_file(file); | |
1208 | + if (vm_flags & VM_EXECUTABLE) { | |
1209 | + added_exe_file_vma(current->mm); | |
1210 | + vma->vm_mm = current->mm; | |
1211 | + } | |
1212 | + } | |
1213 | + | |
1214 | + down_write(&nommu_region_sem); | |
1215 | + | |
1216 | + /* if we want to share, we need to check for regions created by other | |
936 | 1217 | * mmap() calls that overlap with our proposed mapping |
937 | - * - we can only share with an exact match on most regular files | |
1218 | + * - we can only share with a superset match on most regular files | |
938 | 1219 | * - shared mappings on character devices and memory backed files are |
939 | 1220 | * permitted to overlap inexactly as far as we are concerned for in |
940 | 1221 | * these cases, sharing is handled in the driver or filesystem rather |
941 | 1222 | * than here |
942 | 1223 | */ |
943 | 1224 | if (vm_flags & VM_MAYSHARE) { |
944 | - unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
945 | - unsigned long vmpglen; | |
1225 | + struct vm_region *pregion; | |
1226 | + unsigned long pglen, rpglen, pgend, rpgend, start; | |
946 | 1227 | |
947 | - /* suppress VMA sharing for shared regions */ | |
948 | - if (vm_flags & VM_SHARED && | |
949 | - capabilities & BDI_CAP_MAP_DIRECT) | |
950 | - goto dont_share_VMAs; | |
1228 | + pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
1229 | + pgend = pgoff + pglen; | |
951 | 1230 | |
952 | - for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { | |
953 | - vma = rb_entry(rb, struct vm_area_struct, vm_rb); | |
1231 | + for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { | |
1232 | + pregion = rb_entry(rb, struct vm_region, vm_rb); | |
954 | 1233 | |
955 | - if (!(vma->vm_flags & VM_MAYSHARE)) | |
1234 | + if (!(pregion->vm_flags & VM_MAYSHARE)) | |
956 | 1235 | continue; |
957 | 1236 | |
958 | 1237 | /* search for overlapping mappings on the same file */ |
959 | - if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) | |
1238 | + if (pregion->vm_file->f_path.dentry->d_inode != | |
1239 | + file->f_path.dentry->d_inode) | |
960 | 1240 | continue; |
961 | 1241 | |
962 | - if (vma->vm_pgoff >= pgoff + pglen) | |
1242 | + if (pregion->vm_pgoff >= pgend) | |
963 | 1243 | continue; |
964 | 1244 | |
965 | - vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1; | |
966 | - vmpglen >>= PAGE_SHIFT; | |
967 | - if (pgoff >= vma->vm_pgoff + vmpglen) | |
1245 | + rpglen = pregion->vm_end - pregion->vm_start; | |
1246 | + rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
1247 | + rpgend = pregion->vm_pgoff + rpglen; | |
1248 | + if (pgoff >= rpgend) | |
968 | 1249 | continue; |
969 | 1250 | |
970 | - /* handle inexactly overlapping matches between mappings */ | |
971 | - if (vma->vm_pgoff != pgoff || vmpglen != pglen) { | |
1251 | + /* handle inexactly overlapping matches between | |
1252 | + * mappings */ | |
1253 | + if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && | |
1254 | + !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { | |
1255 | + /* new mapping is not a subset of the region */ | |
972 | 1256 | if (!(capabilities & BDI_CAP_MAP_DIRECT)) |
973 | 1257 | goto sharing_violation; |
974 | 1258 | continue; |
975 | 1259 | } |
976 | 1260 | |
977 | - /* we've found a VMA we can share */ | |
978 | - atomic_inc(&vma->vm_usage); | |
1261 | + /* we've found a region we can share */ | |
1262 | + atomic_inc(&pregion->vm_usage); | |
1263 | + vma->vm_region = pregion; | |
1264 | + start = pregion->vm_start; | |
1265 | + start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; | |
1266 | + vma->vm_start = start; | |
1267 | + vma->vm_end = start + len; | |
979 | 1268 | |
980 | - vml->vma = vma; | |
981 | - result = (void *) vma->vm_start; | |
982 | - goto shared; | |
1269 | + if (pregion->vm_flags & VM_MAPPED_COPY) { | |
1270 | + kdebug("share copy"); | |
1271 | + vma->vm_flags |= VM_MAPPED_COPY; | |
1272 | + } else { | |
1273 | + kdebug("share mmap"); | |
1274 | + ret = do_mmap_shared_file(vma); | |
1275 | + if (ret < 0) { | |
1276 | + vma->vm_region = NULL; | |
1277 | + vma->vm_start = 0; | |
1278 | + vma->vm_end = 0; | |
1279 | + atomic_dec(&pregion->vm_usage); | |
1280 | + pregion = NULL; | |
1281 | + goto error_just_free; | |
1282 | + } | |
1283 | + } | |
1284 | + fput(region->vm_file); | |
1285 | + kmem_cache_free(vm_region_jar, region); | |
1286 | + region = pregion; | |
1287 | + result = start; | |
1288 | + goto share; | |
983 | 1289 | } |
984 | 1290 | |
985 | - dont_share_VMAs: | |
986 | - vma = NULL; | |
987 | - | |
988 | 1291 | /* obtain the address at which to make a shared mapping |
989 | 1292 | * - this is the hook for quasi-memory character devices to |
990 | 1293 | * tell us the location of a shared mapping |
991 | 1294 | |
992 | 1295 | |
993 | 1296 | |
994 | 1297 | |
995 | 1298 | |
996 | 1299 | |
997 | 1300 | |
998 | 1301 | |
999 | 1302 | |
1000 | 1303 | |
1001 | 1304 | |
1002 | 1305 | |
1003 | 1306 | |
1004 | 1307 | |
1005 | 1308 | |
1006 | 1309 | |
1007 | 1310 | |
1008 | 1311 | |
1009 | 1312 | |
1010 | 1313 | |
1011 | 1314 | |
... | ... | @@ -995,113 +1298,93 @@ |
995 | 1298 | if (IS_ERR((void *) addr)) { |
996 | 1299 | ret = addr; |
997 | 1300 | if (ret != (unsigned long) -ENOSYS) |
998 | - goto error; | |
1301 | + goto error_just_free; | |
999 | 1302 | |
1000 | 1303 | /* the driver refused to tell us where to site |
1001 | 1304 | * the mapping so we'll have to attempt to copy |
1002 | 1305 | * it */ |
1003 | 1306 | ret = (unsigned long) -ENODEV; |
1004 | 1307 | if (!(capabilities & BDI_CAP_MAP_COPY)) |
1005 | - goto error; | |
1308 | + goto error_just_free; | |
1006 | 1309 | |
1007 | 1310 | capabilities &= ~BDI_CAP_MAP_DIRECT; |
1311 | + } else { | |
1312 | + vma->vm_start = region->vm_start = addr; | |
1313 | + vma->vm_end = region->vm_end = addr + len; | |
1008 | 1314 | } |
1009 | 1315 | } |
1010 | 1316 | } |
1011 | 1317 | |
1012 | - /* we're going to need a VMA struct as well */ | |
1013 | - vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); | |
1014 | - if (!vma) | |
1015 | - goto error_getting_vma; | |
1318 | + vma->vm_region = region; | |
1016 | 1319 | |
1017 | - INIT_LIST_HEAD(&vma->anon_vma_node); | |
1018 | - atomic_set(&vma->vm_usage, 1); | |
1019 | - if (file) { | |
1020 | - get_file(file); | |
1021 | - if (vm_flags & VM_EXECUTABLE) { | |
1022 | - added_exe_file_vma(current->mm); | |
1023 | - vma->vm_mm = current->mm; | |
1024 | - } | |
1025 | - } | |
1026 | - vma->vm_file = file; | |
1027 | - vma->vm_flags = vm_flags; | |
1028 | - vma->vm_start = addr; | |
1029 | - vma->vm_end = addr + len; | |
1030 | - vma->vm_pgoff = pgoff; | |
1031 | - | |
1032 | - vml->vma = vma; | |
1033 | - | |
1034 | 1320 | /* set up the mapping */ |
1035 | 1321 | if (file && vma->vm_flags & VM_SHARED) |
1036 | - ret = do_mmap_shared_file(vma, len); | |
1322 | + ret = do_mmap_shared_file(vma); | |
1037 | 1323 | else |
1038 | - ret = do_mmap_private(vma, len); | |
1324 | + ret = do_mmap_private(vma, region, len); | |
1039 | 1325 | if (ret < 0) |
1040 | - goto error; | |
1326 | + goto error_put_region; | |
1041 | 1327 | |
1328 | + add_nommu_region(region); | |
1329 | + | |
1042 | 1330 | /* okay... we have a mapping; now we have to register it */ |
1043 | - result = (void *) vma->vm_start; | |
1331 | + result = vma->vm_start; | |
1044 | 1332 | |
1045 | - if (vma->vm_flags & VM_MAPPED_COPY) { | |
1046 | - realalloc += kobjsize(result); | |
1047 | - askedalloc += len; | |
1048 | - } | |
1049 | - | |
1050 | - realalloc += kobjsize(vma); | |
1051 | - askedalloc += sizeof(*vma); | |
1052 | - | |
1053 | 1333 | current->mm->total_vm += len >> PAGE_SHIFT; |
1054 | 1334 | |
1055 | - add_nommu_vma(vma); | |
1335 | +share: | |
1336 | + add_vma_to_mm(current->mm, vma); | |
1056 | 1337 | |
1057 | - shared: | |
1058 | - realalloc += kobjsize(vml); | |
1059 | - askedalloc += sizeof(*vml); | |
1338 | + up_write(&nommu_region_sem); | |
1060 | 1339 | |
1061 | - add_vma_to_mm(current->mm, vml); | |
1062 | - | |
1063 | - up_write(&nommu_vma_sem); | |
1064 | - | |
1065 | 1340 | if (prot & PROT_EXEC) |
1066 | - flush_icache_range((unsigned long) result, | |
1067 | - (unsigned long) result + len); | |
1341 | + flush_icache_range(result, result + len); | |
1068 | 1342 | |
1069 | -#ifdef DEBUG | |
1070 | - printk("do_mmap:\n"); | |
1071 | - show_process_blocks(); | |
1072 | -#endif | |
1343 | + kleave(" = %lx", result); | |
1344 | + return result; | |
1073 | 1345 | |
1074 | - return (unsigned long) result; | |
1075 | - | |
1076 | - error: | |
1077 | - up_write(&nommu_vma_sem); | |
1078 | - kfree(vml); | |
1346 | +error_put_region: | |
1347 | + __put_nommu_region(region); | |
1079 | 1348 | if (vma) { |
1080 | 1349 | if (vma->vm_file) { |
1081 | 1350 | fput(vma->vm_file); |
1082 | 1351 | if (vma->vm_flags & VM_EXECUTABLE) |
1083 | 1352 | removed_exe_file_vma(vma->vm_mm); |
1084 | 1353 | } |
1085 | - kfree(vma); | |
1354 | + kmem_cache_free(vm_area_cachep, vma); | |
1086 | 1355 | } |
1356 | + kleave(" = %d [pr]", ret); | |
1087 | 1357 | return ret; |
1088 | 1358 | |
1089 | - sharing_violation: | |
1090 | - up_write(&nommu_vma_sem); | |
1091 | - printk("Attempt to share mismatched mappings\n"); | |
1092 | - kfree(vml); | |
1093 | - return -EINVAL; | |
1359 | +error_just_free: | |
1360 | + up_write(&nommu_region_sem); | |
1361 | +error: | |
1362 | + fput(region->vm_file); | |
1363 | + kmem_cache_free(vm_region_jar, region); | |
1364 | + fput(vma->vm_file); | |
1365 | + if (vma->vm_flags & VM_EXECUTABLE) | |
1366 | + removed_exe_file_vma(vma->vm_mm); | |
1367 | + kmem_cache_free(vm_area_cachep, vma); | |
1368 | + kleave(" = %d", ret); | |
1369 | + return ret; | |
1094 | 1370 | |
1095 | - error_getting_vma: | |
1096 | - up_write(&nommu_vma_sem); | |
1097 | - kfree(vml); | |
1098 | - printk("Allocation of vma for %lu byte allocation from process %d failed\n", | |
1371 | +sharing_violation: | |
1372 | + up_write(&nommu_region_sem); | |
1373 | + printk(KERN_WARNING "Attempt to share mismatched mappings\n"); | |
1374 | + ret = -EINVAL; | |
1375 | + goto error; | |
1376 | + | |
1377 | +error_getting_vma: | |
1378 | + kmem_cache_free(vm_region_jar, region); | |
1379 | + printk(KERN_WARNING "Allocation of vma for %lu byte allocation" | |
1380 | + " from process %d failed\n", | |
1099 | 1381 | len, current->pid); |
1100 | 1382 | show_free_areas(); |
1101 | 1383 | return -ENOMEM; |
1102 | 1384 | |
1103 | - error_getting_vml: | |
1104 | - printk("Allocation of vml for %lu byte allocation from process %d failed\n", | |
1385 | +error_getting_region: | |
1386 | + printk(KERN_WARNING "Allocation of vm region for %lu byte allocation" | |
1387 | + " from process %d failed\n", | |
1105 | 1388 | len, current->pid); |
1106 | 1389 | show_free_areas(); |
1107 | 1390 | return -ENOMEM; |
1108 | 1391 | |
1109 | 1392 | |
1110 | 1393 | |
1111 | 1394 | |
1112 | 1395 | |
1113 | 1396 | |
1114 | 1397 | |
1115 | 1398 | |
1116 | 1399 | |
1117 | 1400 | |
1118 | 1401 | |
1119 | 1402 | |
1120 | 1403 | |
1121 | 1404 | |
1122 | 1405 | |
1123 | 1406 | |
1124 | 1407 | |
1125 | 1408 | |
1126 | 1409 | |
1127 | 1410 | |
1128 | 1411 | |
1129 | 1412 | |
... | ... | @@ -1109,85 +1392,183 @@ |
1109 | 1392 | EXPORT_SYMBOL(do_mmap_pgoff); |
1110 | 1393 | |
1111 | 1394 | /* |
1112 | - * handle mapping disposal for uClinux | |
1395 | + * split a vma into two pieces at address 'addr', a new vma is allocated either | |
1396 | + * for the first part or the tail. | |
1113 | 1397 | */ |
1114 | -static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) | |
1398 | +int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, | |
1399 | + unsigned long addr, int new_below) | |
1115 | 1400 | { |
1116 | - if (vma) { | |
1117 | - down_write(&nommu_vma_sem); | |
1401 | + struct vm_area_struct *new; | |
1402 | + struct vm_region *region; | |
1403 | + unsigned long npages; | |
1118 | 1404 | |
1119 | - if (atomic_dec_and_test(&vma->vm_usage)) { | |
1120 | - delete_nommu_vma(vma); | |
1405 | + kenter(""); | |
1121 | 1406 | |
1122 | - if (vma->vm_ops && vma->vm_ops->close) | |
1123 | - vma->vm_ops->close(vma); | |
1407 | + /* we're only permitted to split anonymous regions that have a single | |
1408 | + * owner */ | |
1409 | + if (vma->vm_file || | |
1410 | + atomic_read(&vma->vm_region->vm_usage) != 1) | |
1411 | + return -ENOMEM; | |
1124 | 1412 | |
1125 | - /* IO memory and memory shared directly out of the pagecache from | |
1126 | - * ramfs/tmpfs mustn't be released here */ | |
1127 | - if (vma->vm_flags & VM_MAPPED_COPY) { | |
1128 | - realalloc -= kobjsize((void *) vma->vm_start); | |
1129 | - askedalloc -= vma->vm_end - vma->vm_start; | |
1130 | - kfree((void *) vma->vm_start); | |
1131 | - } | |
1413 | + if (mm->map_count >= sysctl_max_map_count) | |
1414 | + return -ENOMEM; | |
1132 | 1415 | |
1133 | - realalloc -= kobjsize(vma); | |
1134 | - askedalloc -= sizeof(*vma); | |
1416 | + region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); | |
1417 | + if (!region) | |
1418 | + return -ENOMEM; | |
1135 | 1419 | |
1136 | - if (vma->vm_file) { | |
1137 | - fput(vma->vm_file); | |
1138 | - if (vma->vm_flags & VM_EXECUTABLE) | |
1139 | - removed_exe_file_vma(mm); | |
1140 | - } | |
1141 | - kfree(vma); | |
1142 | - } | |
1420 | + new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); | |
1421 | + if (!new) { | |
1422 | + kmem_cache_free(vm_region_jar, region); | |
1423 | + return -ENOMEM; | |
1424 | + } | |
1143 | 1425 | |
1144 | - up_write(&nommu_vma_sem); | |
1426 | + /* most fields are the same, copy all, and then fixup */ | |
1427 | + *new = *vma; | |
1428 | + *region = *vma->vm_region; | |
1429 | + new->vm_region = region; | |
1430 | + | |
1431 | + npages = (addr - vma->vm_start) >> PAGE_SHIFT; | |
1432 | + | |
1433 | + if (new_below) { | |
1434 | + region->vm_top = region->vm_end = new->vm_end = addr; | |
1435 | + } else { | |
1436 | + region->vm_start = new->vm_start = addr; | |
1437 | + region->vm_pgoff = new->vm_pgoff += npages; | |
1145 | 1438 | } |
1439 | + | |
1440 | + if (new->vm_ops && new->vm_ops->open) | |
1441 | + new->vm_ops->open(new); | |
1442 | + | |
1443 | + delete_vma_from_mm(vma); | |
1444 | + down_write(&nommu_region_sem); | |
1445 | + delete_nommu_region(vma->vm_region); | |
1446 | + if (new_below) { | |
1447 | + vma->vm_region->vm_start = vma->vm_start = addr; | |
1448 | + vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; | |
1449 | + } else { | |
1450 | + vma->vm_region->vm_end = vma->vm_end = addr; | |
1451 | + vma->vm_region->vm_top = addr; | |
1452 | + } | |
1453 | + add_nommu_region(vma->vm_region); | |
1454 | + add_nommu_region(new->vm_region); | |
1455 | + up_write(&nommu_region_sem); | |
1456 | + add_vma_to_mm(mm, vma); | |
1457 | + add_vma_to_mm(mm, new); | |
1458 | + return 0; | |
1146 | 1459 | } |
1147 | 1460 | |
1148 | 1461 | /* |
1149 | - * release a mapping | |
1150 | - * - under NOMMU conditions the parameters must match exactly to the mapping to | |
1151 | - * be removed | |
1462 | + * shrink a VMA by removing the specified chunk from either the beginning or | |
1463 | + * the end | |
1152 | 1464 | */ |
1153 | -int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) | |
1465 | +static int shrink_vma(struct mm_struct *mm, | |
1466 | + struct vm_area_struct *vma, | |
1467 | + unsigned long from, unsigned long to) | |
1154 | 1468 | { |
1155 | - struct vm_list_struct *vml, **parent; | |
1156 | - unsigned long end = addr + len; | |
1469 | + struct vm_region *region; | |
1157 | 1470 | |
1158 | -#ifdef DEBUG | |
1159 | - printk("do_munmap:\n"); | |
1160 | -#endif | |
1471 | + kenter(""); | |
1161 | 1472 | |
1162 | - for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { | |
1163 | - if ((*parent)->vma->vm_start > addr) | |
1164 | - break; | |
1165 | - if ((*parent)->vma->vm_start == addr && | |
1166 | - ((len == 0) || ((*parent)->vma->vm_end == end))) | |
1167 | - goto found; | |
1473 | + /* adjust the VMA's pointers, which may reposition it in the MM's tree | |
1474 | + * and list */ | |
1475 | + delete_vma_from_mm(vma); | |
1476 | + if (from > vma->vm_start) | |
1477 | + vma->vm_end = from; | |
1478 | + else | |
1479 | + vma->vm_start = to; | |
1480 | + add_vma_to_mm(mm, vma); | |
1481 | + | |
1482 | + /* cut the backing region down to size */ | |
1483 | + region = vma->vm_region; | |
1484 | + BUG_ON(atomic_read(®ion->vm_usage) != 1); | |
1485 | + | |
1486 | + down_write(&nommu_region_sem); | |
1487 | + delete_nommu_region(region); | |
1488 | + if (from > region->vm_start) { | |
1489 | + to = region->vm_top; | |
1490 | + region->vm_top = region->vm_end = from; | |
1491 | + } else { | |
1492 | + region->vm_start = to; | |
1168 | 1493 | } |
1494 | + add_nommu_region(region); | |
1495 | + up_write(&nommu_region_sem); | |
1169 | 1496 | |
1170 | - printk("munmap of non-mmaped memory by process %d (%s): %p\n", | |
1171 | - current->pid, current->comm, (void *) addr); | |
1172 | - return -EINVAL; | |
1497 | + free_page_series(from, to); | |
1498 | + return 0; | |
1499 | +} | |
1173 | 1500 | |
1174 | - found: | |
1175 | - vml = *parent; | |
1501 | +/* | |
1502 | + * release a mapping | |
1503 | + * - under NOMMU conditions the chunk to be unmapped must be backed by a single | |
1504 | + * VMA, though it need not cover the whole VMA | |
1505 | + */ | |
1506 | +int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) | |
1507 | +{ | |
1508 | + struct vm_area_struct *vma; | |
1509 | + struct rb_node *rb; | |
1510 | + unsigned long end = start + len; | |
1511 | + int ret; | |
1176 | 1512 | |
1177 | - put_vma(mm, vml->vma); | |
1513 | + kenter(",%lx,%zx", start, len); | |
1178 | 1514 | |
1179 | - *parent = vml->next; | |
1180 | - realalloc -= kobjsize(vml); | |
1181 | - askedalloc -= sizeof(*vml); | |
1182 | - kfree(vml); | |
1515 | + if (len == 0) | |
1516 | + return -EINVAL; | |
1183 | 1517 | |
1184 | - update_hiwater_vm(mm); | |
1185 | - mm->total_vm -= len >> PAGE_SHIFT; | |
1518 | + /* find the first potentially overlapping VMA */ | |
1519 | + vma = find_vma(mm, start); | |
1520 | + if (!vma) { | |
1521 | + printk(KERN_WARNING | |
1522 | + "munmap of memory not mmapped by process %d (%s):" | |
1523 | + " 0x%lx-0x%lx\n", | |
1524 | + current->pid, current->comm, start, start + len - 1); | |
1525 | + return -EINVAL; | |
1526 | + } | |
1186 | 1527 | |
1187 | -#ifdef DEBUG | |
1188 | - show_process_blocks(); | |
1189 | -#endif | |
1528 | + /* we're allowed to split an anonymous VMA but not a file-backed one */ | |
1529 | + if (vma->vm_file) { | |
1530 | + do { | |
1531 | + if (start > vma->vm_start) { | |
1532 | + kleave(" = -EINVAL [miss]"); | |
1533 | + return -EINVAL; | |
1534 | + } | |
1535 | + if (end == vma->vm_end) | |
1536 | + goto erase_whole_vma; | |
1537 | + rb = rb_next(&vma->vm_rb); | |
1538 | + vma = rb_entry(rb, struct vm_area_struct, vm_rb); | |
1539 | + } while (rb); | |
1540 | + kleave(" = -EINVAL [split file]"); | |
1541 | + return -EINVAL; | |
1542 | + } else { | |
1543 | + /* the chunk must be a subset of the VMA found */ | |
1544 | + if (start == vma->vm_start && end == vma->vm_end) | |
1545 | + goto erase_whole_vma; | |
1546 | + if (start < vma->vm_start || end > vma->vm_end) { | |
1547 | + kleave(" = -EINVAL [superset]"); | |
1548 | + return -EINVAL; | |
1549 | + } | |
1550 | + if (start & ~PAGE_MASK) { | |
1551 | + kleave(" = -EINVAL [unaligned start]"); | |
1552 | + return -EINVAL; | |
1553 | + } | |
1554 | + if (end != vma->vm_end && end & ~PAGE_MASK) { | |
1555 | + kleave(" = -EINVAL [unaligned split]"); | |
1556 | + return -EINVAL; | |
1557 | + } | |
1558 | + if (start != vma->vm_start && end != vma->vm_end) { | |
1559 | + ret = split_vma(mm, vma, start, 1); | |
1560 | + if (ret < 0) { | |
1561 | + kleave(" = %d [split]", ret); | |
1562 | + return ret; | |
1563 | + } | |
1564 | + } | |
1565 | + return shrink_vma(mm, vma, start, end); | |
1566 | + } | |
1190 | 1567 | |
1568 | +erase_whole_vma: | |
1569 | + delete_vma_from_mm(vma); | |
1570 | + delete_vma(mm, vma); | |
1571 | + kleave(" = 0"); | |
1191 | 1572 | return 0; |
1192 | 1573 | } |
1193 | 1574 | EXPORT_SYMBOL(do_munmap); |
1194 | 1575 | |
1195 | 1576 | |
1196 | 1577 | |
1197 | 1578 | |
1198 | 1579 | |
1199 | 1580 | |
1200 | 1581 | |
... | ... | @@ -1204,32 +1585,26 @@ |
1204 | 1585 | } |
1205 | 1586 | |
1206 | 1587 | /* |
1207 | - * Release all mappings | |
1588 | + * release all the mappings made in a process's VM space | |
1208 | 1589 | */ |
1209 | -void exit_mmap(struct mm_struct * mm) | |
1590 | +void exit_mmap(struct mm_struct *mm) | |
1210 | 1591 | { |
1211 | - struct vm_list_struct *tmp; | |
1592 | + struct vm_area_struct *vma; | |
1212 | 1593 | |
1213 | - if (mm) { | |
1214 | -#ifdef DEBUG | |
1215 | - printk("Exit_mmap:\n"); | |
1216 | -#endif | |
1594 | + if (!mm) | |
1595 | + return; | |
1217 | 1596 | |
1218 | - mm->total_vm = 0; | |
1597 | + kenter(""); | |
1219 | 1598 | |
1220 | - while ((tmp = mm->context.vmlist)) { | |
1221 | - mm->context.vmlist = tmp->next; | |
1222 | - put_vma(mm, tmp->vma); | |
1599 | + mm->total_vm = 0; | |
1223 | 1600 | |
1224 | - realalloc -= kobjsize(tmp); | |
1225 | - askedalloc -= sizeof(*tmp); | |
1226 | - kfree(tmp); | |
1227 | - } | |
1228 | - | |
1229 | -#ifdef DEBUG | |
1230 | - show_process_blocks(); | |
1231 | -#endif | |
1601 | + while ((vma = mm->mmap)) { | |
1602 | + mm->mmap = vma->vm_next; | |
1603 | + delete_vma_from_mm(vma); | |
1604 | + delete_vma(mm, vma); | |
1232 | 1605 | } |
1606 | + | |
1607 | + kleave(""); | |
1233 | 1608 | } |
1234 | 1609 | |
1235 | 1610 | unsigned long do_brk(unsigned long addr, unsigned long len) |
... | ... | @@ -1242,8 +1617,8 @@ |
1242 | 1617 | * time (controlled by the MREMAP_MAYMOVE flag and available VM space) |
1243 | 1618 | * |
1244 | 1619 | * under NOMMU conditions, we only permit changing a mapping's size, and only |
1245 | - * as long as it stays within the hole allocated by the kmalloc() call in | |
1246 | - * do_mmap_pgoff() and the block is not shareable | |
1620 | + * as long as it stays within the region allocated by do_mmap_private() and the | |
1621 | + * block is not shareable | |
1247 | 1622 | * |
1248 | 1623 | * MREMAP_FIXED is not supported under NOMMU conditions |
1249 | 1624 | */ |
1250 | 1625 | |
1251 | 1626 | |
... | ... | @@ -1254,13 +1629,16 @@ |
1254 | 1629 | struct vm_area_struct *vma; |
1255 | 1630 | |
1256 | 1631 | /* insanity checks first */ |
1257 | - if (new_len == 0) | |
1632 | + if (old_len == 0 || new_len == 0) | |
1258 | 1633 | return (unsigned long) -EINVAL; |
1259 | 1634 | |
1635 | + if (addr & ~PAGE_MASK) | |
1636 | + return -EINVAL; | |
1637 | + | |
1260 | 1638 | if (flags & MREMAP_FIXED && new_addr != addr) |
1261 | 1639 | return (unsigned long) -EINVAL; |
1262 | 1640 | |
1263 | - vma = find_vma_exact(current->mm, addr); | |
1641 | + vma = find_vma_exact(current->mm, addr, old_len); | |
1264 | 1642 | if (!vma) |
1265 | 1643 | return (unsigned long) -EINVAL; |
1266 | 1644 | |
1267 | 1645 | |
1268 | 1646 | |
... | ... | @@ -1270,22 +1648,19 @@ |
1270 | 1648 | if (vma->vm_flags & VM_MAYSHARE) |
1271 | 1649 | return (unsigned long) -EPERM; |
1272 | 1650 | |
1273 | - if (new_len > kobjsize((void *) addr)) | |
1651 | + if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) | |
1274 | 1652 | return (unsigned long) -ENOMEM; |
1275 | 1653 | |
1276 | 1654 | /* all checks complete - do it */ |
1277 | 1655 | vma->vm_end = vma->vm_start + new_len; |
1278 | - | |
1279 | - askedalloc -= old_len; | |
1280 | - askedalloc += new_len; | |
1281 | - | |
1282 | 1656 | return vma->vm_start; |
1283 | 1657 | } |
1284 | 1658 | EXPORT_SYMBOL(do_mremap); |
1285 | 1659 | |
1286 | -asmlinkage unsigned long sys_mremap(unsigned long addr, | |
1287 | - unsigned long old_len, unsigned long new_len, | |
1288 | - unsigned long flags, unsigned long new_addr) | |
1660 | +asmlinkage | |
1661 | +unsigned long sys_mremap(unsigned long addr, | |
1662 | + unsigned long old_len, unsigned long new_len, | |
1663 | + unsigned long flags, unsigned long new_addr) | |
1289 | 1664 | { |
1290 | 1665 | unsigned long ret; |
1291 | 1666 |