Commit e1ca7788dec6773b1a2bce51b7141948f2b8bccf
Committed by
Linus Torvalds
1 parent
7bbc0905ea
Exists in
master
and in
20 other branches
mm: add vzalloc() and vzalloc_node() helpers
Add vzalloc() and vzalloc_node() to encapsulate the vmalloc-then-memset-zero operation. Use __GFP_ZERO to zero fill the allocated memory. Signed-off-by: Dave Young <hidave.darkstar@gmail.com> Cc: Christoph Lameter <cl@linux-foundation.org> Acked-by: Greg Ungerer <gerg@snapgear.com> Cc: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 94 additions and 3 deletions Inline Diff
include/linux/vmalloc.h
1 | #ifndef _LINUX_VMALLOC_H | 1 | #ifndef _LINUX_VMALLOC_H |
2 | #define _LINUX_VMALLOC_H | 2 | #define _LINUX_VMALLOC_H |
3 | 3 | ||
4 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
5 | #include <linux/init.h> | 5 | #include <linux/init.h> |
6 | #include <asm/page.h> /* pgprot_t */ | 6 | #include <asm/page.h> /* pgprot_t */ |
7 | 7 | ||
8 | struct vm_area_struct; /* vma defining user mapping in mm_types.h */ | 8 | struct vm_area_struct; /* vma defining user mapping in mm_types.h */ |
9 | 9 | ||
10 | extern bool vmap_lazy_unmap; | 10 | extern bool vmap_lazy_unmap; |
11 | 11 | ||
12 | /* bits in flags of vmalloc's vm_struct below */ | 12 | /* bits in flags of vmalloc's vm_struct below */ |
13 | #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ | 13 | #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ |
14 | #define VM_ALLOC 0x00000002 /* vmalloc() */ | 14 | #define VM_ALLOC 0x00000002 /* vmalloc() */ |
15 | #define VM_MAP 0x00000004 /* vmap()ed pages */ | 15 | #define VM_MAP 0x00000004 /* vmap()ed pages */ |
16 | #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ | 16 | #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ |
17 | #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ | 17 | #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ |
18 | /* bits [20..32] reserved for arch specific ioremap internals */ | 18 | /* bits [20..32] reserved for arch specific ioremap internals */ |
19 | 19 | ||
20 | /* | 20 | /* |
21 | * Maximum alignment for ioremap() regions. | 21 | * Maximum alignment for ioremap() regions. |
22 | * Can be overriden by arch-specific value. | 22 | * Can be overriden by arch-specific value. |
23 | */ | 23 | */ |
24 | #ifndef IOREMAP_MAX_ORDER | 24 | #ifndef IOREMAP_MAX_ORDER |
25 | #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ | 25 | #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | struct vm_struct { | 28 | struct vm_struct { |
29 | struct vm_struct *next; | 29 | struct vm_struct *next; |
30 | void *addr; | 30 | void *addr; |
31 | unsigned long size; | 31 | unsigned long size; |
32 | unsigned long flags; | 32 | unsigned long flags; |
33 | struct page **pages; | 33 | struct page **pages; |
34 | unsigned int nr_pages; | 34 | unsigned int nr_pages; |
35 | phys_addr_t phys_addr; | 35 | phys_addr_t phys_addr; |
36 | void *caller; | 36 | void *caller; |
37 | }; | 37 | }; |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Highlevel APIs for driver use | 40 | * Highlevel APIs for driver use |
41 | */ | 41 | */ |
42 | extern void vm_unmap_ram(const void *mem, unsigned int count); | 42 | extern void vm_unmap_ram(const void *mem, unsigned int count); |
43 | extern void *vm_map_ram(struct page **pages, unsigned int count, | 43 | extern void *vm_map_ram(struct page **pages, unsigned int count, |
44 | int node, pgprot_t prot); | 44 | int node, pgprot_t prot); |
45 | extern void vm_unmap_aliases(void); | 45 | extern void vm_unmap_aliases(void); |
46 | 46 | ||
47 | #ifdef CONFIG_MMU | 47 | #ifdef CONFIG_MMU |
48 | extern void __init vmalloc_init(void); | 48 | extern void __init vmalloc_init(void); |
49 | #else | 49 | #else |
50 | static inline void vmalloc_init(void) | 50 | static inline void vmalloc_init(void) |
51 | { | 51 | { |
52 | } | 52 | } |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | extern void *vmalloc(unsigned long size); | 55 | extern void *vmalloc(unsigned long size); |
56 | extern void *vzalloc(unsigned long size); | ||
56 | extern void *vmalloc_user(unsigned long size); | 57 | extern void *vmalloc_user(unsigned long size); |
57 | extern void *vmalloc_node(unsigned long size, int node); | 58 | extern void *vmalloc_node(unsigned long size, int node); |
59 | extern void *vzalloc_node(unsigned long size, int node); | ||
58 | extern void *vmalloc_exec(unsigned long size); | 60 | extern void *vmalloc_exec(unsigned long size); |
59 | extern void *vmalloc_32(unsigned long size); | 61 | extern void *vmalloc_32(unsigned long size); |
60 | extern void *vmalloc_32_user(unsigned long size); | 62 | extern void *vmalloc_32_user(unsigned long size); |
61 | extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); | 63 | extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); |
62 | extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, | 64 | extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, |
63 | pgprot_t prot); | 65 | pgprot_t prot); |
64 | extern void vfree(const void *addr); | 66 | extern void vfree(const void *addr); |
65 | 67 | ||
66 | extern void *vmap(struct page **pages, unsigned int count, | 68 | extern void *vmap(struct page **pages, unsigned int count, |
67 | unsigned long flags, pgprot_t prot); | 69 | unsigned long flags, pgprot_t prot); |
68 | extern void vunmap(const void *addr); | 70 | extern void vunmap(const void *addr); |
69 | 71 | ||
70 | extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, | 72 | extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, |
71 | unsigned long pgoff); | 73 | unsigned long pgoff); |
72 | void vmalloc_sync_all(void); | 74 | void vmalloc_sync_all(void); |
73 | 75 | ||
74 | /* | 76 | /* |
75 | * Lowlevel-APIs (not for driver use!) | 77 | * Lowlevel-APIs (not for driver use!) |
76 | */ | 78 | */ |
77 | 79 | ||
78 | static inline size_t get_vm_area_size(const struct vm_struct *area) | 80 | static inline size_t get_vm_area_size(const struct vm_struct *area) |
79 | { | 81 | { |
80 | /* return actual size without guard page */ | 82 | /* return actual size without guard page */ |
81 | return area->size - PAGE_SIZE; | 83 | return area->size - PAGE_SIZE; |
82 | } | 84 | } |
83 | 85 | ||
84 | extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); | 86 | extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); |
85 | extern struct vm_struct *get_vm_area_caller(unsigned long size, | 87 | extern struct vm_struct *get_vm_area_caller(unsigned long size, |
86 | unsigned long flags, void *caller); | 88 | unsigned long flags, void *caller); |
87 | extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, | 89 | extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, |
88 | unsigned long start, unsigned long end); | 90 | unsigned long start, unsigned long end); |
89 | extern struct vm_struct *__get_vm_area_caller(unsigned long size, | 91 | extern struct vm_struct *__get_vm_area_caller(unsigned long size, |
90 | unsigned long flags, | 92 | unsigned long flags, |
91 | unsigned long start, unsigned long end, | 93 | unsigned long start, unsigned long end, |
92 | void *caller); | 94 | void *caller); |
93 | extern struct vm_struct *get_vm_area_node(unsigned long size, | 95 | extern struct vm_struct *get_vm_area_node(unsigned long size, |
94 | unsigned long flags, int node, | 96 | unsigned long flags, int node, |
95 | gfp_t gfp_mask); | 97 | gfp_t gfp_mask); |
96 | extern struct vm_struct *remove_vm_area(const void *addr); | 98 | extern struct vm_struct *remove_vm_area(const void *addr); |
97 | 99 | ||
98 | extern int map_vm_area(struct vm_struct *area, pgprot_t prot, | 100 | extern int map_vm_area(struct vm_struct *area, pgprot_t prot, |
99 | struct page ***pages); | 101 | struct page ***pages); |
100 | extern int map_kernel_range_noflush(unsigned long start, unsigned long size, | 102 | extern int map_kernel_range_noflush(unsigned long start, unsigned long size, |
101 | pgprot_t prot, struct page **pages); | 103 | pgprot_t prot, struct page **pages); |
102 | extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); | 104 | extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); |
103 | extern void unmap_kernel_range(unsigned long addr, unsigned long size); | 105 | extern void unmap_kernel_range(unsigned long addr, unsigned long size); |
104 | 106 | ||
105 | /* Allocate/destroy a 'vmalloc' VM area. */ | 107 | /* Allocate/destroy a 'vmalloc' VM area. */ |
106 | extern struct vm_struct *alloc_vm_area(size_t size); | 108 | extern struct vm_struct *alloc_vm_area(size_t size); |
107 | extern void free_vm_area(struct vm_struct *area); | 109 | extern void free_vm_area(struct vm_struct *area); |
108 | 110 | ||
109 | /* for /dev/kmem */ | 111 | /* for /dev/kmem */ |
110 | extern long vread(char *buf, char *addr, unsigned long count); | 112 | extern long vread(char *buf, char *addr, unsigned long count); |
111 | extern long vwrite(char *buf, char *addr, unsigned long count); | 113 | extern long vwrite(char *buf, char *addr, unsigned long count); |
112 | 114 | ||
113 | /* | 115 | /* |
114 | * Internals. Dont't use.. | 116 | * Internals. Dont't use.. |
115 | */ | 117 | */ |
116 | extern rwlock_t vmlist_lock; | 118 | extern rwlock_t vmlist_lock; |
117 | extern struct vm_struct *vmlist; | 119 | extern struct vm_struct *vmlist; |
118 | extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); | 120 | extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); |
119 | 121 | ||
120 | #ifdef CONFIG_SMP | 122 | #ifdef CONFIG_SMP |
121 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, | 123 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, |
122 | const size_t *sizes, int nr_vms, | 124 | const size_t *sizes, int nr_vms, |
123 | size_t align, gfp_t gfp_mask); | 125 | size_t align, gfp_t gfp_mask); |
124 | 126 | ||
125 | void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); | 127 | void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); |
126 | #endif | 128 | #endif |
127 | 129 | ||
128 | #endif /* _LINUX_VMALLOC_H */ | 130 | #endif /* _LINUX_VMALLOC_H */ |
129 | 131 |
mm/nommu.c
1 | /* | 1 | /* |
2 | * linux/mm/nommu.c | 2 | * linux/mm/nommu.c |
3 | * | 3 | * |
4 | * Replacement code for mm functions to support CPU's that don't | 4 | * Replacement code for mm functions to support CPU's that don't |
5 | * have any form of memory management unit (thus no virtual memory). | 5 | * have any form of memory management unit (thus no virtual memory). |
6 | * | 6 | * |
7 | * See Documentation/nommu-mmap.txt | 7 | * See Documentation/nommu-mmap.txt |
8 | * | 8 | * |
9 | * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> | 9 | * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> |
10 | * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> | 10 | * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> |
11 | * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> | 11 | * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> |
12 | * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> | 12 | * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> |
13 | * Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org> | 13 | * Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org> |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include <linux/mman.h> | 18 | #include <linux/mman.h> |
19 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
20 | #include <linux/file.h> | 20 | #include <linux/file.h> |
21 | #include <linux/highmem.h> | 21 | #include <linux/highmem.h> |
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/vmalloc.h> | 24 | #include <linux/vmalloc.h> |
25 | #include <linux/tracehook.h> | 25 | #include <linux/tracehook.h> |
26 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
29 | #include <linux/personality.h> | 29 | #include <linux/personality.h> |
30 | #include <linux/security.h> | 30 | #include <linux/security.h> |
31 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
32 | 32 | ||
33 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
34 | #include <asm/tlb.h> | 34 | #include <asm/tlb.h> |
35 | #include <asm/tlbflush.h> | 35 | #include <asm/tlbflush.h> |
36 | #include <asm/mmu_context.h> | 36 | #include <asm/mmu_context.h> |
37 | #include "internal.h" | 37 | #include "internal.h" |
38 | 38 | ||
39 | #if 0 | 39 | #if 0 |
40 | #define kenter(FMT, ...) \ | 40 | #define kenter(FMT, ...) \ |
41 | printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) | 41 | printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) |
42 | #define kleave(FMT, ...) \ | 42 | #define kleave(FMT, ...) \ |
43 | printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) | 43 | printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) |
44 | #define kdebug(FMT, ...) \ | 44 | #define kdebug(FMT, ...) \ |
45 | printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__) | 45 | printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__) |
46 | #else | 46 | #else |
47 | #define kenter(FMT, ...) \ | 47 | #define kenter(FMT, ...) \ |
48 | no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) | 48 | no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) |
49 | #define kleave(FMT, ...) \ | 49 | #define kleave(FMT, ...) \ |
50 | no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) | 50 | no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) |
51 | #define kdebug(FMT, ...) \ | 51 | #define kdebug(FMT, ...) \ |
52 | no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__) | 52 | no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__) |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | void *high_memory; | 55 | void *high_memory; |
56 | struct page *mem_map; | 56 | struct page *mem_map; |
57 | unsigned long max_mapnr; | 57 | unsigned long max_mapnr; |
58 | unsigned long num_physpages; | 58 | unsigned long num_physpages; |
59 | unsigned long highest_memmap_pfn; | 59 | unsigned long highest_memmap_pfn; |
60 | struct percpu_counter vm_committed_as; | 60 | struct percpu_counter vm_committed_as; |
61 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 61 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
62 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 62 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
63 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | 63 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
64 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; | 64 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; |
65 | int heap_stack_gap = 0; | 65 | int heap_stack_gap = 0; |
66 | 66 | ||
67 | atomic_long_t mmap_pages_allocated; | 67 | atomic_long_t mmap_pages_allocated; |
68 | 68 | ||
69 | EXPORT_SYMBOL(mem_map); | 69 | EXPORT_SYMBOL(mem_map); |
70 | EXPORT_SYMBOL(num_physpages); | 70 | EXPORT_SYMBOL(num_physpages); |
71 | 71 | ||
72 | /* list of mapped, potentially shareable regions */ | 72 | /* list of mapped, potentially shareable regions */ |
73 | static struct kmem_cache *vm_region_jar; | 73 | static struct kmem_cache *vm_region_jar; |
74 | struct rb_root nommu_region_tree = RB_ROOT; | 74 | struct rb_root nommu_region_tree = RB_ROOT; |
75 | DECLARE_RWSEM(nommu_region_sem); | 75 | DECLARE_RWSEM(nommu_region_sem); |
76 | 76 | ||
77 | const struct vm_operations_struct generic_file_vm_ops = { | 77 | const struct vm_operations_struct generic_file_vm_ops = { |
78 | }; | 78 | }; |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Return the total memory allocated for this pointer, not | 81 | * Return the total memory allocated for this pointer, not |
82 | * just what the caller asked for. | 82 | * just what the caller asked for. |
83 | * | 83 | * |
84 | * Doesn't have to be accurate, i.e. may have races. | 84 | * Doesn't have to be accurate, i.e. may have races. |
85 | */ | 85 | */ |
86 | unsigned int kobjsize(const void *objp) | 86 | unsigned int kobjsize(const void *objp) |
87 | { | 87 | { |
88 | struct page *page; | 88 | struct page *page; |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * If the object we have should not have ksize performed on it, | 91 | * If the object we have should not have ksize performed on it, |
92 | * return size of 0 | 92 | * return size of 0 |
93 | */ | 93 | */ |
94 | if (!objp || !virt_addr_valid(objp)) | 94 | if (!objp || !virt_addr_valid(objp)) |
95 | return 0; | 95 | return 0; |
96 | 96 | ||
97 | page = virt_to_head_page(objp); | 97 | page = virt_to_head_page(objp); |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * If the allocator sets PageSlab, we know the pointer came from | 100 | * If the allocator sets PageSlab, we know the pointer came from |
101 | * kmalloc(). | 101 | * kmalloc(). |
102 | */ | 102 | */ |
103 | if (PageSlab(page)) | 103 | if (PageSlab(page)) |
104 | return ksize(objp); | 104 | return ksize(objp); |
105 | 105 | ||
106 | /* | 106 | /* |
107 | * If it's not a compound page, see if we have a matching VMA | 107 | * If it's not a compound page, see if we have a matching VMA |
108 | * region. This test is intentionally done in reverse order, | 108 | * region. This test is intentionally done in reverse order, |
109 | * so if there's no VMA, we still fall through and hand back | 109 | * so if there's no VMA, we still fall through and hand back |
110 | * PAGE_SIZE for 0-order pages. | 110 | * PAGE_SIZE for 0-order pages. |
111 | */ | 111 | */ |
112 | if (!PageCompound(page)) { | 112 | if (!PageCompound(page)) { |
113 | struct vm_area_struct *vma; | 113 | struct vm_area_struct *vma; |
114 | 114 | ||
115 | vma = find_vma(current->mm, (unsigned long)objp); | 115 | vma = find_vma(current->mm, (unsigned long)objp); |
116 | if (vma) | 116 | if (vma) |
117 | return vma->vm_end - vma->vm_start; | 117 | return vma->vm_end - vma->vm_start; |
118 | } | 118 | } |
119 | 119 | ||
120 | /* | 120 | /* |
121 | * The ksize() function is only guaranteed to work for pointers | 121 | * The ksize() function is only guaranteed to work for pointers |
122 | * returned by kmalloc(). So handle arbitrary pointers here. | 122 | * returned by kmalloc(). So handle arbitrary pointers here. |
123 | */ | 123 | */ |
124 | return PAGE_SIZE << compound_order(page); | 124 | return PAGE_SIZE << compound_order(page); |
125 | } | 125 | } |
126 | 126 | ||
127 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 127 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
128 | unsigned long start, int nr_pages, unsigned int foll_flags, | 128 | unsigned long start, int nr_pages, unsigned int foll_flags, |
129 | struct page **pages, struct vm_area_struct **vmas) | 129 | struct page **pages, struct vm_area_struct **vmas) |
130 | { | 130 | { |
131 | struct vm_area_struct *vma; | 131 | struct vm_area_struct *vma; |
132 | unsigned long vm_flags; | 132 | unsigned long vm_flags; |
133 | int i; | 133 | int i; |
134 | 134 | ||
135 | /* calculate required read or write permissions. | 135 | /* calculate required read or write permissions. |
136 | * If FOLL_FORCE is set, we only require the "MAY" flags. | 136 | * If FOLL_FORCE is set, we only require the "MAY" flags. |
137 | */ | 137 | */ |
138 | vm_flags = (foll_flags & FOLL_WRITE) ? | 138 | vm_flags = (foll_flags & FOLL_WRITE) ? |
139 | (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); | 139 | (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); |
140 | vm_flags &= (foll_flags & FOLL_FORCE) ? | 140 | vm_flags &= (foll_flags & FOLL_FORCE) ? |
141 | (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); | 141 | (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); |
142 | 142 | ||
143 | for (i = 0; i < nr_pages; i++) { | 143 | for (i = 0; i < nr_pages; i++) { |
144 | vma = find_vma(mm, start); | 144 | vma = find_vma(mm, start); |
145 | if (!vma) | 145 | if (!vma) |
146 | goto finish_or_fault; | 146 | goto finish_or_fault; |
147 | 147 | ||
148 | /* protect what we can, including chardevs */ | 148 | /* protect what we can, including chardevs */ |
149 | if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) || | 149 | if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) || |
150 | !(vm_flags & vma->vm_flags)) | 150 | !(vm_flags & vma->vm_flags)) |
151 | goto finish_or_fault; | 151 | goto finish_or_fault; |
152 | 152 | ||
153 | if (pages) { | 153 | if (pages) { |
154 | pages[i] = virt_to_page(start); | 154 | pages[i] = virt_to_page(start); |
155 | if (pages[i]) | 155 | if (pages[i]) |
156 | page_cache_get(pages[i]); | 156 | page_cache_get(pages[i]); |
157 | } | 157 | } |
158 | if (vmas) | 158 | if (vmas) |
159 | vmas[i] = vma; | 159 | vmas[i] = vma; |
160 | start = (start + PAGE_SIZE) & PAGE_MASK; | 160 | start = (start + PAGE_SIZE) & PAGE_MASK; |
161 | } | 161 | } |
162 | 162 | ||
163 | return i; | 163 | return i; |
164 | 164 | ||
165 | finish_or_fault: | 165 | finish_or_fault: |
166 | return i ? : -EFAULT; | 166 | return i ? : -EFAULT; |
167 | } | 167 | } |
168 | 168 | ||
169 | /* | 169 | /* |
170 | * get a list of pages in an address range belonging to the specified process | 170 | * get a list of pages in an address range belonging to the specified process |
171 | * and indicate the VMA that covers each page | 171 | * and indicate the VMA that covers each page |
172 | * - this is potentially dodgy as we may end incrementing the page count of a | 172 | * - this is potentially dodgy as we may end incrementing the page count of a |
173 | * slab page or a secondary page from a compound page | 173 | * slab page or a secondary page from a compound page |
174 | * - don't permit access to VMAs that don't support it, such as I/O mappings | 174 | * - don't permit access to VMAs that don't support it, such as I/O mappings |
175 | */ | 175 | */ |
176 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 176 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
177 | unsigned long start, int nr_pages, int write, int force, | 177 | unsigned long start, int nr_pages, int write, int force, |
178 | struct page **pages, struct vm_area_struct **vmas) | 178 | struct page **pages, struct vm_area_struct **vmas) |
179 | { | 179 | { |
180 | int flags = 0; | 180 | int flags = 0; |
181 | 181 | ||
182 | if (write) | 182 | if (write) |
183 | flags |= FOLL_WRITE; | 183 | flags |= FOLL_WRITE; |
184 | if (force) | 184 | if (force) |
185 | flags |= FOLL_FORCE; | 185 | flags |= FOLL_FORCE; |
186 | 186 | ||
187 | return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); | 187 | return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); |
188 | } | 188 | } |
189 | EXPORT_SYMBOL(get_user_pages); | 189 | EXPORT_SYMBOL(get_user_pages); |
190 | 190 | ||
191 | /** | 191 | /** |
192 | * follow_pfn - look up PFN at a user virtual address | 192 | * follow_pfn - look up PFN at a user virtual address |
193 | * @vma: memory mapping | 193 | * @vma: memory mapping |
194 | * @address: user virtual address | 194 | * @address: user virtual address |
195 | * @pfn: location to store found PFN | 195 | * @pfn: location to store found PFN |
196 | * | 196 | * |
197 | * Only IO mappings and raw PFN mappings are allowed. | 197 | * Only IO mappings and raw PFN mappings are allowed. |
198 | * | 198 | * |
199 | * Returns zero and the pfn at @pfn on success, -ve otherwise. | 199 | * Returns zero and the pfn at @pfn on success, -ve otherwise. |
200 | */ | 200 | */ |
201 | int follow_pfn(struct vm_area_struct *vma, unsigned long address, | 201 | int follow_pfn(struct vm_area_struct *vma, unsigned long address, |
202 | unsigned long *pfn) | 202 | unsigned long *pfn) |
203 | { | 203 | { |
204 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | 204 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) |
205 | return -EINVAL; | 205 | return -EINVAL; |
206 | 206 | ||
207 | *pfn = address >> PAGE_SHIFT; | 207 | *pfn = address >> PAGE_SHIFT; |
208 | return 0; | 208 | return 0; |
209 | } | 209 | } |
210 | EXPORT_SYMBOL(follow_pfn); | 210 | EXPORT_SYMBOL(follow_pfn); |
211 | 211 | ||
212 | DEFINE_RWLOCK(vmlist_lock); | 212 | DEFINE_RWLOCK(vmlist_lock); |
213 | struct vm_struct *vmlist; | 213 | struct vm_struct *vmlist; |
214 | 214 | ||
215 | void vfree(const void *addr) | 215 | void vfree(const void *addr) |
216 | { | 216 | { |
217 | kfree(addr); | 217 | kfree(addr); |
218 | } | 218 | } |
219 | EXPORT_SYMBOL(vfree); | 219 | EXPORT_SYMBOL(vfree); |
220 | 220 | ||
221 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) | 221 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) |
222 | { | 222 | { |
223 | /* | 223 | /* |
224 | * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() | 224 | * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() |
225 | * returns only a logical address. | 225 | * returns only a logical address. |
226 | */ | 226 | */ |
227 | return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); | 227 | return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); |
228 | } | 228 | } |
229 | EXPORT_SYMBOL(__vmalloc); | 229 | EXPORT_SYMBOL(__vmalloc); |
230 | 230 | ||
231 | void *vmalloc_user(unsigned long size) | 231 | void *vmalloc_user(unsigned long size) |
232 | { | 232 | { |
233 | void *ret; | 233 | void *ret; |
234 | 234 | ||
235 | ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, | 235 | ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, |
236 | PAGE_KERNEL); | 236 | PAGE_KERNEL); |
237 | if (ret) { | 237 | if (ret) { |
238 | struct vm_area_struct *vma; | 238 | struct vm_area_struct *vma; |
239 | 239 | ||
240 | down_write(¤t->mm->mmap_sem); | 240 | down_write(¤t->mm->mmap_sem); |
241 | vma = find_vma(current->mm, (unsigned long)ret); | 241 | vma = find_vma(current->mm, (unsigned long)ret); |
242 | if (vma) | 242 | if (vma) |
243 | vma->vm_flags |= VM_USERMAP; | 243 | vma->vm_flags |= VM_USERMAP; |
244 | up_write(¤t->mm->mmap_sem); | 244 | up_write(¤t->mm->mmap_sem); |
245 | } | 245 | } |
246 | 246 | ||
247 | return ret; | 247 | return ret; |
248 | } | 248 | } |
249 | EXPORT_SYMBOL(vmalloc_user); | 249 | EXPORT_SYMBOL(vmalloc_user); |
250 | 250 | ||
251 | struct page *vmalloc_to_page(const void *addr) | 251 | struct page *vmalloc_to_page(const void *addr) |
252 | { | 252 | { |
253 | return virt_to_page(addr); | 253 | return virt_to_page(addr); |
254 | } | 254 | } |
255 | EXPORT_SYMBOL(vmalloc_to_page); | 255 | EXPORT_SYMBOL(vmalloc_to_page); |
256 | 256 | ||
257 | unsigned long vmalloc_to_pfn(const void *addr) | 257 | unsigned long vmalloc_to_pfn(const void *addr) |
258 | { | 258 | { |
259 | return page_to_pfn(virt_to_page(addr)); | 259 | return page_to_pfn(virt_to_page(addr)); |
260 | } | 260 | } |
261 | EXPORT_SYMBOL(vmalloc_to_pfn); | 261 | EXPORT_SYMBOL(vmalloc_to_pfn); |
262 | 262 | ||
263 | long vread(char *buf, char *addr, unsigned long count) | 263 | long vread(char *buf, char *addr, unsigned long count) |
264 | { | 264 | { |
265 | memcpy(buf, addr, count); | 265 | memcpy(buf, addr, count); |
266 | return count; | 266 | return count; |
267 | } | 267 | } |
268 | 268 | ||
269 | long vwrite(char *buf, char *addr, unsigned long count) | 269 | long vwrite(char *buf, char *addr, unsigned long count) |
270 | { | 270 | { |
271 | /* Don't allow overflow */ | 271 | /* Don't allow overflow */ |
272 | if ((unsigned long) addr + count < count) | 272 | if ((unsigned long) addr + count < count) |
273 | count = -(unsigned long) addr; | 273 | count = -(unsigned long) addr; |
274 | 274 | ||
275 | memcpy(addr, buf, count); | 275 | memcpy(addr, buf, count); |
276 | return(count); | 276 | return(count); |
277 | } | 277 | } |
278 | 278 | ||
279 | /* | 279 | /* |
280 | * vmalloc - allocate virtually continguos memory | 280 | * vmalloc - allocate virtually continguos memory |
281 | * | 281 | * |
282 | * @size: allocation size | 282 | * @size: allocation size |
283 | * | 283 | * |
284 | * Allocate enough pages to cover @size from the page level | 284 | * Allocate enough pages to cover @size from the page level |
285 | * allocator and map them into continguos kernel virtual space. | 285 | * allocator and map them into continguos kernel virtual space. |
286 | * | 286 | * |
287 | * For tight control over page level allocator and protection flags | 287 | * For tight control over page level allocator and protection flags |
288 | * use __vmalloc() instead. | 288 | * use __vmalloc() instead. |
289 | */ | 289 | */ |
290 | void *vmalloc(unsigned long size) | 290 | void *vmalloc(unsigned long size) |
291 | { | 291 | { |
292 | return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); | 292 | return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); |
293 | } | 293 | } |
294 | EXPORT_SYMBOL(vmalloc); | 294 | EXPORT_SYMBOL(vmalloc); |
295 | 295 | ||
296 | /* | ||
297 | * vzalloc - allocate virtually continguos memory with zero fill | ||
298 | * | ||
299 | * @size: allocation size | ||
300 | * | ||
301 | * Allocate enough pages to cover @size from the page level | ||
302 | * allocator and map them into continguos kernel virtual space. | ||
303 | * The memory allocated is set to zero. | ||
304 | * | ||
305 | * For tight control over page level allocator and protection flags | ||
306 | * use __vmalloc() instead. | ||
307 | */ | ||
308 | void *vzalloc(unsigned long size) | ||
309 | { | ||
310 | return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, | ||
311 | PAGE_KERNEL); | ||
312 | } | ||
313 | EXPORT_SYMBOL(vzalloc); | ||
314 | |||
315 | /** | ||
316 | * vmalloc_node - allocate memory on a specific node | ||
317 | * @size: allocation size | ||
318 | * @node: numa node | ||
319 | * | ||
320 | * Allocate enough pages to cover @size from the page level | ||
321 | * allocator and map them into contiguous kernel virtual space. | ||
322 | * | ||
323 | * For tight control over page level allocator and protection flags | ||
324 | * use __vmalloc() instead. | ||
325 | */ | ||
296 | void *vmalloc_node(unsigned long size, int node) | 326 | void *vmalloc_node(unsigned long size, int node) |
297 | { | 327 | { |
298 | return vmalloc(size); | 328 | return vmalloc(size); |
299 | } | 329 | } |
300 | EXPORT_SYMBOL(vmalloc_node); | 330 | |
331 | /** | ||
332 | * vzalloc_node - allocate memory on a specific node with zero fill | ||
333 | * @size: allocation size | ||
334 | * @node: numa node | ||
335 | * | ||
336 | * Allocate enough pages to cover @size from the page level | ||
337 | * allocator and map them into contiguous kernel virtual space. | ||
338 | * The memory allocated is set to zero. | ||
339 | * | ||
340 | * For tight control over page level allocator and protection flags | ||
341 | * use __vmalloc() instead. | ||
342 | */ | ||
343 | void *vzalloc_node(unsigned long size, int node) | ||
344 | { | ||
345 | return vzalloc(size); | ||
346 | } | ||
347 | EXPORT_SYMBOL(vzalloc_node); | ||
301 | 348 | ||
302 | #ifndef PAGE_KERNEL_EXEC | 349 | #ifndef PAGE_KERNEL_EXEC |
303 | # define PAGE_KERNEL_EXEC PAGE_KERNEL | 350 | # define PAGE_KERNEL_EXEC PAGE_KERNEL |
304 | #endif | 351 | #endif |
305 | 352 | ||
306 | /** | 353 | /** |
307 | * vmalloc_exec - allocate virtually contiguous, executable memory | 354 | * vmalloc_exec - allocate virtually contiguous, executable memory |
308 | * @size: allocation size | 355 | * @size: allocation size |
309 | * | 356 | * |
310 | * Kernel-internal function to allocate enough pages to cover @size | 357 | * Kernel-internal function to allocate enough pages to cover @size |
311 | * the page level allocator and map them into contiguous and | 358 | * the page level allocator and map them into contiguous and |
312 | * executable kernel virtual space. | 359 | * executable kernel virtual space. |
313 | * | 360 | * |
314 | * For tight control over page level allocator and protection flags | 361 | * For tight control over page level allocator and protection flags |
315 | * use __vmalloc() instead. | 362 | * use __vmalloc() instead. |
316 | */ | 363 | */ |
317 | 364 | ||
318 | void *vmalloc_exec(unsigned long size) | 365 | void *vmalloc_exec(unsigned long size) |
319 | { | 366 | { |
320 | return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); | 367 | return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); |
321 | } | 368 | } |
322 | 369 | ||
323 | /** | 370 | /** |
324 | * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) | 371 | * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) |
325 | * @size: allocation size | 372 | * @size: allocation size |
326 | * | 373 | * |
327 | * Allocate enough 32bit PA addressable pages to cover @size from the | 374 | * Allocate enough 32bit PA addressable pages to cover @size from the |
328 | * page level allocator and map them into continguos kernel virtual space. | 375 | * page level allocator and map them into continguos kernel virtual space. |
329 | */ | 376 | */ |
330 | void *vmalloc_32(unsigned long size) | 377 | void *vmalloc_32(unsigned long size) |
331 | { | 378 | { |
332 | return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); | 379 | return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); |
333 | } | 380 | } |
334 | EXPORT_SYMBOL(vmalloc_32); | 381 | EXPORT_SYMBOL(vmalloc_32); |
335 | 382 | ||
336 | /** | 383 | /** |
337 | * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory | 384 | * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory |
338 | * @size: allocation size | 385 | * @size: allocation size |
339 | * | 386 | * |
340 | * The resulting memory area is 32bit addressable and zeroed so it can be | 387 | * The resulting memory area is 32bit addressable and zeroed so it can be |
341 | * mapped to userspace without leaking data. | 388 | * mapped to userspace without leaking data. |
342 | * | 389 | * |
343 | * VM_USERMAP is set on the corresponding VMA so that subsequent calls to | 390 | * VM_USERMAP is set on the corresponding VMA so that subsequent calls to |
344 | * remap_vmalloc_range() are permissible. | 391 | * remap_vmalloc_range() are permissible. |
345 | */ | 392 | */ |
346 | void *vmalloc_32_user(unsigned long size) | 393 | void *vmalloc_32_user(unsigned long size) |
347 | { | 394 | { |
348 | /* | 395 | /* |
349 | * We'll have to sort out the ZONE_DMA bits for 64-bit, | 396 | * We'll have to sort out the ZONE_DMA bits for 64-bit, |
350 | * but for now this can simply use vmalloc_user() directly. | 397 | * but for now this can simply use vmalloc_user() directly. |
351 | */ | 398 | */ |
352 | return vmalloc_user(size); | 399 | return vmalloc_user(size); |
353 | } | 400 | } |
354 | EXPORT_SYMBOL(vmalloc_32_user); | 401 | EXPORT_SYMBOL(vmalloc_32_user); |
355 | 402 | ||
356 | void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) | 403 | void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) |
357 | { | 404 | { |
358 | BUG(); | 405 | BUG(); |
359 | return NULL; | 406 | return NULL; |
360 | } | 407 | } |
361 | EXPORT_SYMBOL(vmap); | 408 | EXPORT_SYMBOL(vmap); |
362 | 409 | ||
363 | void vunmap(const void *addr) | 410 | void vunmap(const void *addr) |
364 | { | 411 | { |
365 | BUG(); | 412 | BUG(); |
366 | } | 413 | } |
367 | EXPORT_SYMBOL(vunmap); | 414 | EXPORT_SYMBOL(vunmap); |
368 | 415 | ||
369 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) | 416 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) |
370 | { | 417 | { |
371 | BUG(); | 418 | BUG(); |
372 | return NULL; | 419 | return NULL; |
373 | } | 420 | } |
374 | EXPORT_SYMBOL(vm_map_ram); | 421 | EXPORT_SYMBOL(vm_map_ram); |
375 | 422 | ||
376 | void vm_unmap_ram(const void *mem, unsigned int count) | 423 | void vm_unmap_ram(const void *mem, unsigned int count) |
377 | { | 424 | { |
378 | BUG(); | 425 | BUG(); |
379 | } | 426 | } |
380 | EXPORT_SYMBOL(vm_unmap_ram); | 427 | EXPORT_SYMBOL(vm_unmap_ram); |
381 | 428 | ||
382 | void vm_unmap_aliases(void) | 429 | void vm_unmap_aliases(void) |
383 | { | 430 | { |
384 | } | 431 | } |
385 | EXPORT_SYMBOL_GPL(vm_unmap_aliases); | 432 | EXPORT_SYMBOL_GPL(vm_unmap_aliases); |
386 | 433 | ||
387 | /* | 434 | /* |
388 | * Implement a stub for vmalloc_sync_all() if the architecture chose not to | 435 | * Implement a stub for vmalloc_sync_all() if the architecture chose not to |
389 | * have one. | 436 | * have one. |
390 | */ | 437 | */ |
391 | void __attribute__((weak)) vmalloc_sync_all(void) | 438 | void __attribute__((weak)) vmalloc_sync_all(void) |
392 | { | 439 | { |
393 | } | 440 | } |
394 | 441 | ||
395 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, | 442 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, |
396 | struct page *page) | 443 | struct page *page) |
397 | { | 444 | { |
398 | return -EINVAL; | 445 | return -EINVAL; |
399 | } | 446 | } |
400 | EXPORT_SYMBOL(vm_insert_page); | 447 | EXPORT_SYMBOL(vm_insert_page); |
401 | 448 | ||
402 | /* | 449 | /* |
403 | * sys_brk() for the most part doesn't need the global kernel | 450 | * sys_brk() for the most part doesn't need the global kernel |
404 | * lock, except when an application is doing something nasty | 451 | * lock, except when an application is doing something nasty |
405 | * like trying to un-brk an area that has already been mapped | 452 | * like trying to un-brk an area that has already been mapped |
406 | * to a regular file. in this case, the unmapping will need | 453 | * to a regular file. in this case, the unmapping will need |
407 | * to invoke file system routines that need the global lock. | 454 | * to invoke file system routines that need the global lock. |
408 | */ | 455 | */ |
409 | SYSCALL_DEFINE1(brk, unsigned long, brk) | 456 | SYSCALL_DEFINE1(brk, unsigned long, brk) |
410 | { | 457 | { |
411 | struct mm_struct *mm = current->mm; | 458 | struct mm_struct *mm = current->mm; |
412 | 459 | ||
413 | if (brk < mm->start_brk || brk > mm->context.end_brk) | 460 | if (brk < mm->start_brk || brk > mm->context.end_brk) |
414 | return mm->brk; | 461 | return mm->brk; |
415 | 462 | ||
416 | if (mm->brk == brk) | 463 | if (mm->brk == brk) |
417 | return mm->brk; | 464 | return mm->brk; |
418 | 465 | ||
419 | /* | 466 | /* |
420 | * Always allow shrinking brk | 467 | * Always allow shrinking brk |
421 | */ | 468 | */ |
422 | if (brk <= mm->brk) { | 469 | if (brk <= mm->brk) { |
423 | mm->brk = brk; | 470 | mm->brk = brk; |
424 | return brk; | 471 | return brk; |
425 | } | 472 | } |
426 | 473 | ||
427 | /* | 474 | /* |
428 | * Ok, looks good - let it rip. | 475 | * Ok, looks good - let it rip. |
429 | */ | 476 | */ |
430 | flush_icache_range(mm->brk, brk); | 477 | flush_icache_range(mm->brk, brk); |
431 | return mm->brk = brk; | 478 | return mm->brk = brk; |
432 | } | 479 | } |
433 | 480 | ||
434 | /* | 481 | /* |
435 | * initialise the VMA and region record slabs | 482 | * initialise the VMA and region record slabs |
436 | */ | 483 | */ |
437 | void __init mmap_init(void) | 484 | void __init mmap_init(void) |
438 | { | 485 | { |
439 | int ret; | 486 | int ret; |
440 | 487 | ||
441 | ret = percpu_counter_init(&vm_committed_as, 0); | 488 | ret = percpu_counter_init(&vm_committed_as, 0); |
442 | VM_BUG_ON(ret); | 489 | VM_BUG_ON(ret); |
443 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); | 490 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); |
444 | } | 491 | } |
445 | 492 | ||
446 | /* | 493 | /* |
447 | * validate the region tree | 494 | * validate the region tree |
448 | * - the caller must hold the region lock | 495 | * - the caller must hold the region lock |
449 | */ | 496 | */ |
450 | #ifdef CONFIG_DEBUG_NOMMU_REGIONS | 497 | #ifdef CONFIG_DEBUG_NOMMU_REGIONS |
451 | static noinline void validate_nommu_regions(void) | 498 | static noinline void validate_nommu_regions(void) |
452 | { | 499 | { |
453 | struct vm_region *region, *last; | 500 | struct vm_region *region, *last; |
454 | struct rb_node *p, *lastp; | 501 | struct rb_node *p, *lastp; |
455 | 502 | ||
456 | lastp = rb_first(&nommu_region_tree); | 503 | lastp = rb_first(&nommu_region_tree); |
457 | if (!lastp) | 504 | if (!lastp) |
458 | return; | 505 | return; |
459 | 506 | ||
460 | last = rb_entry(lastp, struct vm_region, vm_rb); | 507 | last = rb_entry(lastp, struct vm_region, vm_rb); |
461 | BUG_ON(unlikely(last->vm_end <= last->vm_start)); | 508 | BUG_ON(unlikely(last->vm_end <= last->vm_start)); |
462 | BUG_ON(unlikely(last->vm_top < last->vm_end)); | 509 | BUG_ON(unlikely(last->vm_top < last->vm_end)); |
463 | 510 | ||
464 | while ((p = rb_next(lastp))) { | 511 | while ((p = rb_next(lastp))) { |
465 | region = rb_entry(p, struct vm_region, vm_rb); | 512 | region = rb_entry(p, struct vm_region, vm_rb); |
466 | last = rb_entry(lastp, struct vm_region, vm_rb); | 513 | last = rb_entry(lastp, struct vm_region, vm_rb); |
467 | 514 | ||
468 | BUG_ON(unlikely(region->vm_end <= region->vm_start)); | 515 | BUG_ON(unlikely(region->vm_end <= region->vm_start)); |
469 | BUG_ON(unlikely(region->vm_top < region->vm_end)); | 516 | BUG_ON(unlikely(region->vm_top < region->vm_end)); |
470 | BUG_ON(unlikely(region->vm_start < last->vm_top)); | 517 | BUG_ON(unlikely(region->vm_start < last->vm_top)); |
471 | 518 | ||
472 | lastp = p; | 519 | lastp = p; |
473 | } | 520 | } |
474 | } | 521 | } |
475 | #else | 522 | #else |
476 | static void validate_nommu_regions(void) | 523 | static void validate_nommu_regions(void) |
477 | { | 524 | { |
478 | } | 525 | } |
479 | #endif | 526 | #endif |
480 | 527 | ||
481 | /* | 528 | /* |
482 | * add a region into the global tree | 529 | * add a region into the global tree |
483 | */ | 530 | */ |
484 | static void add_nommu_region(struct vm_region *region) | 531 | static void add_nommu_region(struct vm_region *region) |
485 | { | 532 | { |
486 | struct vm_region *pregion; | 533 | struct vm_region *pregion; |
487 | struct rb_node **p, *parent; | 534 | struct rb_node **p, *parent; |
488 | 535 | ||
489 | validate_nommu_regions(); | 536 | validate_nommu_regions(); |
490 | 537 | ||
491 | parent = NULL; | 538 | parent = NULL; |
492 | p = &nommu_region_tree.rb_node; | 539 | p = &nommu_region_tree.rb_node; |
493 | while (*p) { | 540 | while (*p) { |
494 | parent = *p; | 541 | parent = *p; |
495 | pregion = rb_entry(parent, struct vm_region, vm_rb); | 542 | pregion = rb_entry(parent, struct vm_region, vm_rb); |
496 | if (region->vm_start < pregion->vm_start) | 543 | if (region->vm_start < pregion->vm_start) |
497 | p = &(*p)->rb_left; | 544 | p = &(*p)->rb_left; |
498 | else if (region->vm_start > pregion->vm_start) | 545 | else if (region->vm_start > pregion->vm_start) |
499 | p = &(*p)->rb_right; | 546 | p = &(*p)->rb_right; |
500 | else if (pregion == region) | 547 | else if (pregion == region) |
501 | return; | 548 | return; |
502 | else | 549 | else |
503 | BUG(); | 550 | BUG(); |
504 | } | 551 | } |
505 | 552 | ||
506 | rb_link_node(®ion->vm_rb, parent, p); | 553 | rb_link_node(®ion->vm_rb, parent, p); |
507 | rb_insert_color(®ion->vm_rb, &nommu_region_tree); | 554 | rb_insert_color(®ion->vm_rb, &nommu_region_tree); |
508 | 555 | ||
509 | validate_nommu_regions(); | 556 | validate_nommu_regions(); |
510 | } | 557 | } |
511 | 558 | ||
512 | /* | 559 | /* |
513 | * delete a region from the global tree | 560 | * delete a region from the global tree |
514 | */ | 561 | */ |
515 | static void delete_nommu_region(struct vm_region *region) | 562 | static void delete_nommu_region(struct vm_region *region) |
516 | { | 563 | { |
517 | BUG_ON(!nommu_region_tree.rb_node); | 564 | BUG_ON(!nommu_region_tree.rb_node); |
518 | 565 | ||
519 | validate_nommu_regions(); | 566 | validate_nommu_regions(); |
520 | rb_erase(®ion->vm_rb, &nommu_region_tree); | 567 | rb_erase(®ion->vm_rb, &nommu_region_tree); |
521 | validate_nommu_regions(); | 568 | validate_nommu_regions(); |
522 | } | 569 | } |
523 | 570 | ||
524 | /* | 571 | /* |
525 | * free a contiguous series of pages | 572 | * free a contiguous series of pages |
526 | */ | 573 | */ |
527 | static void free_page_series(unsigned long from, unsigned long to) | 574 | static void free_page_series(unsigned long from, unsigned long to) |
528 | { | 575 | { |
529 | for (; from < to; from += PAGE_SIZE) { | 576 | for (; from < to; from += PAGE_SIZE) { |
530 | struct page *page = virt_to_page(from); | 577 | struct page *page = virt_to_page(from); |
531 | 578 | ||
532 | kdebug("- free %lx", from); | 579 | kdebug("- free %lx", from); |
533 | atomic_long_dec(&mmap_pages_allocated); | 580 | atomic_long_dec(&mmap_pages_allocated); |
534 | if (page_count(page) != 1) | 581 | if (page_count(page) != 1) |
535 | kdebug("free page %p: refcount not one: %d", | 582 | kdebug("free page %p: refcount not one: %d", |
536 | page, page_count(page)); | 583 | page, page_count(page)); |
537 | put_page(page); | 584 | put_page(page); |
538 | } | 585 | } |
539 | } | 586 | } |
540 | 587 | ||
541 | /* | 588 | /* |
542 | * release a reference to a region | 589 | * release a reference to a region |
543 | * - the caller must hold the region semaphore for writing, which this releases | 590 | * - the caller must hold the region semaphore for writing, which this releases |
544 | * - the region may not have been added to the tree yet, in which case vm_top | 591 | * - the region may not have been added to the tree yet, in which case vm_top |
545 | * will equal vm_start | 592 | * will equal vm_start |
546 | */ | 593 | */ |
547 | static void __put_nommu_region(struct vm_region *region) | 594 | static void __put_nommu_region(struct vm_region *region) |
548 | __releases(nommu_region_sem) | 595 | __releases(nommu_region_sem) |
549 | { | 596 | { |
550 | kenter("%p{%d}", region, region->vm_usage); | 597 | kenter("%p{%d}", region, region->vm_usage); |
551 | 598 | ||
552 | BUG_ON(!nommu_region_tree.rb_node); | 599 | BUG_ON(!nommu_region_tree.rb_node); |
553 | 600 | ||
554 | if (--region->vm_usage == 0) { | 601 | if (--region->vm_usage == 0) { |
555 | if (region->vm_top > region->vm_start) | 602 | if (region->vm_top > region->vm_start) |
556 | delete_nommu_region(region); | 603 | delete_nommu_region(region); |
557 | up_write(&nommu_region_sem); | 604 | up_write(&nommu_region_sem); |
558 | 605 | ||
559 | if (region->vm_file) | 606 | if (region->vm_file) |
560 | fput(region->vm_file); | 607 | fput(region->vm_file); |
561 | 608 | ||
562 | /* IO memory and memory shared directly out of the pagecache | 609 | /* IO memory and memory shared directly out of the pagecache |
563 | * from ramfs/tmpfs mustn't be released here */ | 610 | * from ramfs/tmpfs mustn't be released here */ |
564 | if (region->vm_flags & VM_MAPPED_COPY) { | 611 | if (region->vm_flags & VM_MAPPED_COPY) { |
565 | kdebug("free series"); | 612 | kdebug("free series"); |
566 | free_page_series(region->vm_start, region->vm_top); | 613 | free_page_series(region->vm_start, region->vm_top); |
567 | } | 614 | } |
568 | kmem_cache_free(vm_region_jar, region); | 615 | kmem_cache_free(vm_region_jar, region); |
569 | } else { | 616 | } else { |
570 | up_write(&nommu_region_sem); | 617 | up_write(&nommu_region_sem); |
571 | } | 618 | } |
572 | } | 619 | } |
573 | 620 | ||
574 | /* | 621 | /* |
575 | * release a reference to a region | 622 | * release a reference to a region |
576 | */ | 623 | */ |
577 | static void put_nommu_region(struct vm_region *region) | 624 | static void put_nommu_region(struct vm_region *region) |
578 | { | 625 | { |
579 | down_write(&nommu_region_sem); | 626 | down_write(&nommu_region_sem); |
580 | __put_nommu_region(region); | 627 | __put_nommu_region(region); |
581 | } | 628 | } |
582 | 629 | ||
583 | /* | 630 | /* |
584 | * update protection on a vma | 631 | * update protection on a vma |
585 | */ | 632 | */ |
586 | static void protect_vma(struct vm_area_struct *vma, unsigned long flags) | 633 | static void protect_vma(struct vm_area_struct *vma, unsigned long flags) |
587 | { | 634 | { |
588 | #ifdef CONFIG_MPU | 635 | #ifdef CONFIG_MPU |
589 | struct mm_struct *mm = vma->vm_mm; | 636 | struct mm_struct *mm = vma->vm_mm; |
590 | long start = vma->vm_start & PAGE_MASK; | 637 | long start = vma->vm_start & PAGE_MASK; |
591 | while (start < vma->vm_end) { | 638 | while (start < vma->vm_end) { |
592 | protect_page(mm, start, flags); | 639 | protect_page(mm, start, flags); |
593 | start += PAGE_SIZE; | 640 | start += PAGE_SIZE; |
594 | } | 641 | } |
595 | update_protections(mm); | 642 | update_protections(mm); |
596 | #endif | 643 | #endif |
597 | } | 644 | } |
598 | 645 | ||
599 | /* | 646 | /* |
600 | * add a VMA into a process's mm_struct in the appropriate place in the list | 647 | * add a VMA into a process's mm_struct in the appropriate place in the list |
601 | * and tree and add to the address space's page tree also if not an anonymous | 648 | * and tree and add to the address space's page tree also if not an anonymous |
602 | * page | 649 | * page |
603 | * - should be called with mm->mmap_sem held writelocked | 650 | * - should be called with mm->mmap_sem held writelocked |
604 | */ | 651 | */ |
605 | static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) | 652 | static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) |
606 | { | 653 | { |
607 | struct vm_area_struct *pvma, **pp, *next; | 654 | struct vm_area_struct *pvma, **pp, *next; |
608 | struct address_space *mapping; | 655 | struct address_space *mapping; |
609 | struct rb_node **p, *parent; | 656 | struct rb_node **p, *parent; |
610 | 657 | ||
611 | kenter(",%p", vma); | 658 | kenter(",%p", vma); |
612 | 659 | ||
613 | BUG_ON(!vma->vm_region); | 660 | BUG_ON(!vma->vm_region); |
614 | 661 | ||
615 | mm->map_count++; | 662 | mm->map_count++; |
616 | vma->vm_mm = mm; | 663 | vma->vm_mm = mm; |
617 | 664 | ||
618 | protect_vma(vma, vma->vm_flags); | 665 | protect_vma(vma, vma->vm_flags); |
619 | 666 | ||
620 | /* add the VMA to the mapping */ | 667 | /* add the VMA to the mapping */ |
621 | if (vma->vm_file) { | 668 | if (vma->vm_file) { |
622 | mapping = vma->vm_file->f_mapping; | 669 | mapping = vma->vm_file->f_mapping; |
623 | 670 | ||
624 | flush_dcache_mmap_lock(mapping); | 671 | flush_dcache_mmap_lock(mapping); |
625 | vma_prio_tree_insert(vma, &mapping->i_mmap); | 672 | vma_prio_tree_insert(vma, &mapping->i_mmap); |
626 | flush_dcache_mmap_unlock(mapping); | 673 | flush_dcache_mmap_unlock(mapping); |
627 | } | 674 | } |
628 | 675 | ||
629 | /* add the VMA to the tree */ | 676 | /* add the VMA to the tree */ |
630 | parent = NULL; | 677 | parent = NULL; |
631 | p = &mm->mm_rb.rb_node; | 678 | p = &mm->mm_rb.rb_node; |
632 | while (*p) { | 679 | while (*p) { |
633 | parent = *p; | 680 | parent = *p; |
634 | pvma = rb_entry(parent, struct vm_area_struct, vm_rb); | 681 | pvma = rb_entry(parent, struct vm_area_struct, vm_rb); |
635 | 682 | ||
636 | /* sort by: start addr, end addr, VMA struct addr in that order | 683 | /* sort by: start addr, end addr, VMA struct addr in that order |
637 | * (the latter is necessary as we may get identical VMAs) */ | 684 | * (the latter is necessary as we may get identical VMAs) */ |
638 | if (vma->vm_start < pvma->vm_start) | 685 | if (vma->vm_start < pvma->vm_start) |
639 | p = &(*p)->rb_left; | 686 | p = &(*p)->rb_left; |
640 | else if (vma->vm_start > pvma->vm_start) | 687 | else if (vma->vm_start > pvma->vm_start) |
641 | p = &(*p)->rb_right; | 688 | p = &(*p)->rb_right; |
642 | else if (vma->vm_end < pvma->vm_end) | 689 | else if (vma->vm_end < pvma->vm_end) |
643 | p = &(*p)->rb_left; | 690 | p = &(*p)->rb_left; |
644 | else if (vma->vm_end > pvma->vm_end) | 691 | else if (vma->vm_end > pvma->vm_end) |
645 | p = &(*p)->rb_right; | 692 | p = &(*p)->rb_right; |
646 | else if (vma < pvma) | 693 | else if (vma < pvma) |
647 | p = &(*p)->rb_left; | 694 | p = &(*p)->rb_left; |
648 | else if (vma > pvma) | 695 | else if (vma > pvma) |
649 | p = &(*p)->rb_right; | 696 | p = &(*p)->rb_right; |
650 | else | 697 | else |
651 | BUG(); | 698 | BUG(); |
652 | } | 699 | } |
653 | 700 | ||
654 | rb_link_node(&vma->vm_rb, parent, p); | 701 | rb_link_node(&vma->vm_rb, parent, p); |
655 | rb_insert_color(&vma->vm_rb, &mm->mm_rb); | 702 | rb_insert_color(&vma->vm_rb, &mm->mm_rb); |
656 | 703 | ||
657 | /* add VMA to the VMA list also */ | 704 | /* add VMA to the VMA list also */ |
658 | for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) { | 705 | for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) { |
659 | if (pvma->vm_start > vma->vm_start) | 706 | if (pvma->vm_start > vma->vm_start) |
660 | break; | 707 | break; |
661 | if (pvma->vm_start < vma->vm_start) | 708 | if (pvma->vm_start < vma->vm_start) |
662 | continue; | 709 | continue; |
663 | if (pvma->vm_end < vma->vm_end) | 710 | if (pvma->vm_end < vma->vm_end) |
664 | break; | 711 | break; |
665 | } | 712 | } |
666 | 713 | ||
667 | next = *pp; | 714 | next = *pp; |
668 | *pp = vma; | 715 | *pp = vma; |
669 | vma->vm_next = next; | 716 | vma->vm_next = next; |
670 | if (next) | 717 | if (next) |
671 | next->vm_prev = vma; | 718 | next->vm_prev = vma; |
672 | } | 719 | } |
673 | 720 | ||
674 | /* | 721 | /* |
675 | * delete a VMA from its owning mm_struct and address space | 722 | * delete a VMA from its owning mm_struct and address space |
676 | */ | 723 | */ |
677 | static void delete_vma_from_mm(struct vm_area_struct *vma) | 724 | static void delete_vma_from_mm(struct vm_area_struct *vma) |
678 | { | 725 | { |
679 | struct vm_area_struct **pp; | 726 | struct vm_area_struct **pp; |
680 | struct address_space *mapping; | 727 | struct address_space *mapping; |
681 | struct mm_struct *mm = vma->vm_mm; | 728 | struct mm_struct *mm = vma->vm_mm; |
682 | 729 | ||
683 | kenter("%p", vma); | 730 | kenter("%p", vma); |
684 | 731 | ||
685 | protect_vma(vma, 0); | 732 | protect_vma(vma, 0); |
686 | 733 | ||
687 | mm->map_count--; | 734 | mm->map_count--; |
688 | if (mm->mmap_cache == vma) | 735 | if (mm->mmap_cache == vma) |
689 | mm->mmap_cache = NULL; | 736 | mm->mmap_cache = NULL; |
690 | 737 | ||
691 | /* remove the VMA from the mapping */ | 738 | /* remove the VMA from the mapping */ |
692 | if (vma->vm_file) { | 739 | if (vma->vm_file) { |
693 | mapping = vma->vm_file->f_mapping; | 740 | mapping = vma->vm_file->f_mapping; |
694 | 741 | ||
695 | flush_dcache_mmap_lock(mapping); | 742 | flush_dcache_mmap_lock(mapping); |
696 | vma_prio_tree_remove(vma, &mapping->i_mmap); | 743 | vma_prio_tree_remove(vma, &mapping->i_mmap); |
697 | flush_dcache_mmap_unlock(mapping); | 744 | flush_dcache_mmap_unlock(mapping); |
698 | } | 745 | } |
699 | 746 | ||
700 | /* remove from the MM's tree and list */ | 747 | /* remove from the MM's tree and list */ |
701 | rb_erase(&vma->vm_rb, &mm->mm_rb); | 748 | rb_erase(&vma->vm_rb, &mm->mm_rb); |
702 | for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) { | 749 | for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) { |
703 | if (*pp == vma) { | 750 | if (*pp == vma) { |
704 | *pp = vma->vm_next; | 751 | *pp = vma->vm_next; |
705 | break; | 752 | break; |
706 | } | 753 | } |
707 | } | 754 | } |
708 | 755 | ||
709 | vma->vm_mm = NULL; | 756 | vma->vm_mm = NULL; |
710 | } | 757 | } |
711 | 758 | ||
712 | /* | 759 | /* |
713 | * destroy a VMA record | 760 | * destroy a VMA record |
714 | */ | 761 | */ |
715 | static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) | 762 | static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) |
716 | { | 763 | { |
717 | kenter("%p", vma); | 764 | kenter("%p", vma); |
718 | if (vma->vm_ops && vma->vm_ops->close) | 765 | if (vma->vm_ops && vma->vm_ops->close) |
719 | vma->vm_ops->close(vma); | 766 | vma->vm_ops->close(vma); |
720 | if (vma->vm_file) { | 767 | if (vma->vm_file) { |
721 | fput(vma->vm_file); | 768 | fput(vma->vm_file); |
722 | if (vma->vm_flags & VM_EXECUTABLE) | 769 | if (vma->vm_flags & VM_EXECUTABLE) |
723 | removed_exe_file_vma(mm); | 770 | removed_exe_file_vma(mm); |
724 | } | 771 | } |
725 | put_nommu_region(vma->vm_region); | 772 | put_nommu_region(vma->vm_region); |
726 | kmem_cache_free(vm_area_cachep, vma); | 773 | kmem_cache_free(vm_area_cachep, vma); |
727 | } | 774 | } |
728 | 775 | ||
729 | /* | 776 | /* |
730 | * look up the first VMA in which addr resides, NULL if none | 777 | * look up the first VMA in which addr resides, NULL if none |
731 | * - should be called with mm->mmap_sem at least held readlocked | 778 | * - should be called with mm->mmap_sem at least held readlocked |
732 | */ | 779 | */ |
733 | struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) | 780 | struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) |
734 | { | 781 | { |
735 | struct vm_area_struct *vma; | 782 | struct vm_area_struct *vma; |
736 | struct rb_node *n = mm->mm_rb.rb_node; | 783 | struct rb_node *n = mm->mm_rb.rb_node; |
737 | 784 | ||
738 | /* check the cache first */ | 785 | /* check the cache first */ |
739 | vma = mm->mmap_cache; | 786 | vma = mm->mmap_cache; |
740 | if (vma && vma->vm_start <= addr && vma->vm_end > addr) | 787 | if (vma && vma->vm_start <= addr && vma->vm_end > addr) |
741 | return vma; | 788 | return vma; |
742 | 789 | ||
743 | /* trawl the tree (there may be multiple mappings in which addr | 790 | /* trawl the tree (there may be multiple mappings in which addr |
744 | * resides) */ | 791 | * resides) */ |
745 | for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { | 792 | for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { |
746 | vma = rb_entry(n, struct vm_area_struct, vm_rb); | 793 | vma = rb_entry(n, struct vm_area_struct, vm_rb); |
747 | if (vma->vm_start > addr) | 794 | if (vma->vm_start > addr) |
748 | return NULL; | 795 | return NULL; |
749 | if (vma->vm_end > addr) { | 796 | if (vma->vm_end > addr) { |
750 | mm->mmap_cache = vma; | 797 | mm->mmap_cache = vma; |
751 | return vma; | 798 | return vma; |
752 | } | 799 | } |
753 | } | 800 | } |
754 | 801 | ||
755 | return NULL; | 802 | return NULL; |
756 | } | 803 | } |
757 | EXPORT_SYMBOL(find_vma); | 804 | EXPORT_SYMBOL(find_vma); |
758 | 805 | ||
759 | /* | 806 | /* |
760 | * find a VMA | 807 | * find a VMA |
761 | * - we don't extend stack VMAs under NOMMU conditions | 808 | * - we don't extend stack VMAs under NOMMU conditions |
762 | */ | 809 | */ |
763 | struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) | 810 | struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) |
764 | { | 811 | { |
765 | return find_vma(mm, addr); | 812 | return find_vma(mm, addr); |
766 | } | 813 | } |
767 | 814 | ||
768 | /* | 815 | /* |
769 | * expand a stack to a given address | 816 | * expand a stack to a given address |
770 | * - not supported under NOMMU conditions | 817 | * - not supported under NOMMU conditions |
771 | */ | 818 | */ |
772 | int expand_stack(struct vm_area_struct *vma, unsigned long address) | 819 | int expand_stack(struct vm_area_struct *vma, unsigned long address) |
773 | { | 820 | { |
774 | return -ENOMEM; | 821 | return -ENOMEM; |
775 | } | 822 | } |
776 | 823 | ||
777 | /* | 824 | /* |
778 | * look up the first VMA exactly that exactly matches addr | 825 | * look up the first VMA exactly that exactly matches addr |
779 | * - should be called with mm->mmap_sem at least held readlocked | 826 | * - should be called with mm->mmap_sem at least held readlocked |
780 | */ | 827 | */ |
781 | static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, | 828 | static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, |
782 | unsigned long addr, | 829 | unsigned long addr, |
783 | unsigned long len) | 830 | unsigned long len) |
784 | { | 831 | { |
785 | struct vm_area_struct *vma; | 832 | struct vm_area_struct *vma; |
786 | struct rb_node *n = mm->mm_rb.rb_node; | 833 | struct rb_node *n = mm->mm_rb.rb_node; |
787 | unsigned long end = addr + len; | 834 | unsigned long end = addr + len; |
788 | 835 | ||
789 | /* check the cache first */ | 836 | /* check the cache first */ |
790 | vma = mm->mmap_cache; | 837 | vma = mm->mmap_cache; |
791 | if (vma && vma->vm_start == addr && vma->vm_end == end) | 838 | if (vma && vma->vm_start == addr && vma->vm_end == end) |
792 | return vma; | 839 | return vma; |
793 | 840 | ||
794 | /* trawl the tree (there may be multiple mappings in which addr | 841 | /* trawl the tree (there may be multiple mappings in which addr |
795 | * resides) */ | 842 | * resides) */ |
796 | for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { | 843 | for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { |
797 | vma = rb_entry(n, struct vm_area_struct, vm_rb); | 844 | vma = rb_entry(n, struct vm_area_struct, vm_rb); |
798 | if (vma->vm_start < addr) | 845 | if (vma->vm_start < addr) |
799 | continue; | 846 | continue; |
800 | if (vma->vm_start > addr) | 847 | if (vma->vm_start > addr) |
801 | return NULL; | 848 | return NULL; |
802 | if (vma->vm_end == end) { | 849 | if (vma->vm_end == end) { |
803 | mm->mmap_cache = vma; | 850 | mm->mmap_cache = vma; |
804 | return vma; | 851 | return vma; |
805 | } | 852 | } |
806 | } | 853 | } |
807 | 854 | ||
808 | return NULL; | 855 | return NULL; |
809 | } | 856 | } |
810 | 857 | ||
811 | /* | 858 | /* |
812 | * determine whether a mapping should be permitted and, if so, what sort of | 859 | * determine whether a mapping should be permitted and, if so, what sort of |
813 | * mapping we're capable of supporting | 860 | * mapping we're capable of supporting |
814 | */ | 861 | */ |
815 | static int validate_mmap_request(struct file *file, | 862 | static int validate_mmap_request(struct file *file, |
816 | unsigned long addr, | 863 | unsigned long addr, |
817 | unsigned long len, | 864 | unsigned long len, |
818 | unsigned long prot, | 865 | unsigned long prot, |
819 | unsigned long flags, | 866 | unsigned long flags, |
820 | unsigned long pgoff, | 867 | unsigned long pgoff, |
821 | unsigned long *_capabilities) | 868 | unsigned long *_capabilities) |
822 | { | 869 | { |
823 | unsigned long capabilities, rlen; | 870 | unsigned long capabilities, rlen; |
824 | unsigned long reqprot = prot; | 871 | unsigned long reqprot = prot; |
825 | int ret; | 872 | int ret; |
826 | 873 | ||
827 | /* do the simple checks first */ | 874 | /* do the simple checks first */ |
828 | if (flags & MAP_FIXED) { | 875 | if (flags & MAP_FIXED) { |
829 | printk(KERN_DEBUG | 876 | printk(KERN_DEBUG |
830 | "%d: Can't do fixed-address/overlay mmap of RAM\n", | 877 | "%d: Can't do fixed-address/overlay mmap of RAM\n", |
831 | current->pid); | 878 | current->pid); |
832 | return -EINVAL; | 879 | return -EINVAL; |
833 | } | 880 | } |
834 | 881 | ||
835 | if ((flags & MAP_TYPE) != MAP_PRIVATE && | 882 | if ((flags & MAP_TYPE) != MAP_PRIVATE && |
836 | (flags & MAP_TYPE) != MAP_SHARED) | 883 | (flags & MAP_TYPE) != MAP_SHARED) |
837 | return -EINVAL; | 884 | return -EINVAL; |
838 | 885 | ||
839 | if (!len) | 886 | if (!len) |
840 | return -EINVAL; | 887 | return -EINVAL; |
841 | 888 | ||
842 | /* Careful about overflows.. */ | 889 | /* Careful about overflows.. */ |
843 | rlen = PAGE_ALIGN(len); | 890 | rlen = PAGE_ALIGN(len); |
844 | if (!rlen || rlen > TASK_SIZE) | 891 | if (!rlen || rlen > TASK_SIZE) |
845 | return -ENOMEM; | 892 | return -ENOMEM; |
846 | 893 | ||
847 | /* offset overflow? */ | 894 | /* offset overflow? */ |
848 | if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) | 895 | if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) |
849 | return -EOVERFLOW; | 896 | return -EOVERFLOW; |
850 | 897 | ||
851 | if (file) { | 898 | if (file) { |
852 | /* validate file mapping requests */ | 899 | /* validate file mapping requests */ |
853 | struct address_space *mapping; | 900 | struct address_space *mapping; |
854 | 901 | ||
855 | /* files must support mmap */ | 902 | /* files must support mmap */ |
856 | if (!file->f_op || !file->f_op->mmap) | 903 | if (!file->f_op || !file->f_op->mmap) |
857 | return -ENODEV; | 904 | return -ENODEV; |
858 | 905 | ||
859 | /* work out if what we've got could possibly be shared | 906 | /* work out if what we've got could possibly be shared |
860 | * - we support chardevs that provide their own "memory" | 907 | * - we support chardevs that provide their own "memory" |
861 | * - we support files/blockdevs that are memory backed | 908 | * - we support files/blockdevs that are memory backed |
862 | */ | 909 | */ |
863 | mapping = file->f_mapping; | 910 | mapping = file->f_mapping; |
864 | if (!mapping) | 911 | if (!mapping) |
865 | mapping = file->f_path.dentry->d_inode->i_mapping; | 912 | mapping = file->f_path.dentry->d_inode->i_mapping; |
866 | 913 | ||
867 | capabilities = 0; | 914 | capabilities = 0; |
868 | if (mapping && mapping->backing_dev_info) | 915 | if (mapping && mapping->backing_dev_info) |
869 | capabilities = mapping->backing_dev_info->capabilities; | 916 | capabilities = mapping->backing_dev_info->capabilities; |
870 | 917 | ||
871 | if (!capabilities) { | 918 | if (!capabilities) { |
872 | /* no explicit capabilities set, so assume some | 919 | /* no explicit capabilities set, so assume some |
873 | * defaults */ | 920 | * defaults */ |
874 | switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) { | 921 | switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) { |
875 | case S_IFREG: | 922 | case S_IFREG: |
876 | case S_IFBLK: | 923 | case S_IFBLK: |
877 | capabilities = BDI_CAP_MAP_COPY; | 924 | capabilities = BDI_CAP_MAP_COPY; |
878 | break; | 925 | break; |
879 | 926 | ||
880 | case S_IFCHR: | 927 | case S_IFCHR: |
881 | capabilities = | 928 | capabilities = |
882 | BDI_CAP_MAP_DIRECT | | 929 | BDI_CAP_MAP_DIRECT | |
883 | BDI_CAP_READ_MAP | | 930 | BDI_CAP_READ_MAP | |
884 | BDI_CAP_WRITE_MAP; | 931 | BDI_CAP_WRITE_MAP; |
885 | break; | 932 | break; |
886 | 933 | ||
887 | default: | 934 | default: |
888 | return -EINVAL; | 935 | return -EINVAL; |
889 | } | 936 | } |
890 | } | 937 | } |
891 | 938 | ||
892 | /* eliminate any capabilities that we can't support on this | 939 | /* eliminate any capabilities that we can't support on this |
893 | * device */ | 940 | * device */ |
894 | if (!file->f_op->get_unmapped_area) | 941 | if (!file->f_op->get_unmapped_area) |
895 | capabilities &= ~BDI_CAP_MAP_DIRECT; | 942 | capabilities &= ~BDI_CAP_MAP_DIRECT; |
896 | if (!file->f_op->read) | 943 | if (!file->f_op->read) |
897 | capabilities &= ~BDI_CAP_MAP_COPY; | 944 | capabilities &= ~BDI_CAP_MAP_COPY; |
898 | 945 | ||
899 | /* The file shall have been opened with read permission. */ | 946 | /* The file shall have been opened with read permission. */ |
900 | if (!(file->f_mode & FMODE_READ)) | 947 | if (!(file->f_mode & FMODE_READ)) |
901 | return -EACCES; | 948 | return -EACCES; |
902 | 949 | ||
903 | if (flags & MAP_SHARED) { | 950 | if (flags & MAP_SHARED) { |
904 | /* do checks for writing, appending and locking */ | 951 | /* do checks for writing, appending and locking */ |
905 | if ((prot & PROT_WRITE) && | 952 | if ((prot & PROT_WRITE) && |
906 | !(file->f_mode & FMODE_WRITE)) | 953 | !(file->f_mode & FMODE_WRITE)) |
907 | return -EACCES; | 954 | return -EACCES; |
908 | 955 | ||
909 | if (IS_APPEND(file->f_path.dentry->d_inode) && | 956 | if (IS_APPEND(file->f_path.dentry->d_inode) && |
910 | (file->f_mode & FMODE_WRITE)) | 957 | (file->f_mode & FMODE_WRITE)) |
911 | return -EACCES; | 958 | return -EACCES; |
912 | 959 | ||
913 | if (locks_verify_locked(file->f_path.dentry->d_inode)) | 960 | if (locks_verify_locked(file->f_path.dentry->d_inode)) |
914 | return -EAGAIN; | 961 | return -EAGAIN; |
915 | 962 | ||
916 | if (!(capabilities & BDI_CAP_MAP_DIRECT)) | 963 | if (!(capabilities & BDI_CAP_MAP_DIRECT)) |
917 | return -ENODEV; | 964 | return -ENODEV; |
918 | 965 | ||
919 | /* we mustn't privatise shared mappings */ | 966 | /* we mustn't privatise shared mappings */ |
920 | capabilities &= ~BDI_CAP_MAP_COPY; | 967 | capabilities &= ~BDI_CAP_MAP_COPY; |
921 | } | 968 | } |
922 | else { | 969 | else { |
923 | /* we're going to read the file into private memory we | 970 | /* we're going to read the file into private memory we |
924 | * allocate */ | 971 | * allocate */ |
925 | if (!(capabilities & BDI_CAP_MAP_COPY)) | 972 | if (!(capabilities & BDI_CAP_MAP_COPY)) |
926 | return -ENODEV; | 973 | return -ENODEV; |
927 | 974 | ||
928 | /* we don't permit a private writable mapping to be | 975 | /* we don't permit a private writable mapping to be |
929 | * shared with the backing device */ | 976 | * shared with the backing device */ |
930 | if (prot & PROT_WRITE) | 977 | if (prot & PROT_WRITE) |
931 | capabilities &= ~BDI_CAP_MAP_DIRECT; | 978 | capabilities &= ~BDI_CAP_MAP_DIRECT; |
932 | } | 979 | } |
933 | 980 | ||
934 | if (capabilities & BDI_CAP_MAP_DIRECT) { | 981 | if (capabilities & BDI_CAP_MAP_DIRECT) { |
935 | if (((prot & PROT_READ) && !(capabilities & BDI_CAP_READ_MAP)) || | 982 | if (((prot & PROT_READ) && !(capabilities & BDI_CAP_READ_MAP)) || |
936 | ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) || | 983 | ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) || |
937 | ((prot & PROT_EXEC) && !(capabilities & BDI_CAP_EXEC_MAP)) | 984 | ((prot & PROT_EXEC) && !(capabilities & BDI_CAP_EXEC_MAP)) |
938 | ) { | 985 | ) { |
939 | capabilities &= ~BDI_CAP_MAP_DIRECT; | 986 | capabilities &= ~BDI_CAP_MAP_DIRECT; |
940 | if (flags & MAP_SHARED) { | 987 | if (flags & MAP_SHARED) { |
941 | printk(KERN_WARNING | 988 | printk(KERN_WARNING |
942 | "MAP_SHARED not completely supported on !MMU\n"); | 989 | "MAP_SHARED not completely supported on !MMU\n"); |
943 | return -EINVAL; | 990 | return -EINVAL; |
944 | } | 991 | } |
945 | } | 992 | } |
946 | } | 993 | } |
947 | 994 | ||
948 | /* handle executable mappings and implied executable | 995 | /* handle executable mappings and implied executable |
949 | * mappings */ | 996 | * mappings */ |
950 | if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { | 997 | if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { |
951 | if (prot & PROT_EXEC) | 998 | if (prot & PROT_EXEC) |
952 | return -EPERM; | 999 | return -EPERM; |
953 | } | 1000 | } |
954 | else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { | 1001 | else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { |
955 | /* handle implication of PROT_EXEC by PROT_READ */ | 1002 | /* handle implication of PROT_EXEC by PROT_READ */ |
956 | if (current->personality & READ_IMPLIES_EXEC) { | 1003 | if (current->personality & READ_IMPLIES_EXEC) { |
957 | if (capabilities & BDI_CAP_EXEC_MAP) | 1004 | if (capabilities & BDI_CAP_EXEC_MAP) |
958 | prot |= PROT_EXEC; | 1005 | prot |= PROT_EXEC; |
959 | } | 1006 | } |
960 | } | 1007 | } |
961 | else if ((prot & PROT_READ) && | 1008 | else if ((prot & PROT_READ) && |
962 | (prot & PROT_EXEC) && | 1009 | (prot & PROT_EXEC) && |
963 | !(capabilities & BDI_CAP_EXEC_MAP) | 1010 | !(capabilities & BDI_CAP_EXEC_MAP) |
964 | ) { | 1011 | ) { |
965 | /* backing file is not executable, try to copy */ | 1012 | /* backing file is not executable, try to copy */ |
966 | capabilities &= ~BDI_CAP_MAP_DIRECT; | 1013 | capabilities &= ~BDI_CAP_MAP_DIRECT; |
967 | } | 1014 | } |
968 | } | 1015 | } |
969 | else { | 1016 | else { |
970 | /* anonymous mappings are always memory backed and can be | 1017 | /* anonymous mappings are always memory backed and can be |
971 | * privately mapped | 1018 | * privately mapped |
972 | */ | 1019 | */ |
973 | capabilities = BDI_CAP_MAP_COPY; | 1020 | capabilities = BDI_CAP_MAP_COPY; |
974 | 1021 | ||
975 | /* handle PROT_EXEC implication by PROT_READ */ | 1022 | /* handle PROT_EXEC implication by PROT_READ */ |
976 | if ((prot & PROT_READ) && | 1023 | if ((prot & PROT_READ) && |
977 | (current->personality & READ_IMPLIES_EXEC)) | 1024 | (current->personality & READ_IMPLIES_EXEC)) |
978 | prot |= PROT_EXEC; | 1025 | prot |= PROT_EXEC; |
979 | } | 1026 | } |
980 | 1027 | ||
981 | /* allow the security API to have its say */ | 1028 | /* allow the security API to have its say */ |
982 | ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); | 1029 | ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); |
983 | if (ret < 0) | 1030 | if (ret < 0) |
984 | return ret; | 1031 | return ret; |
985 | 1032 | ||
986 | /* looks okay */ | 1033 | /* looks okay */ |
987 | *_capabilities = capabilities; | 1034 | *_capabilities = capabilities; |
988 | return 0; | 1035 | return 0; |
989 | } | 1036 | } |
990 | 1037 | ||
991 | /* | 1038 | /* |
992 | * we've determined that we can make the mapping, now translate what we | 1039 | * we've determined that we can make the mapping, now translate what we |
993 | * now know into VMA flags | 1040 | * now know into VMA flags |
994 | */ | 1041 | */ |
995 | static unsigned long determine_vm_flags(struct file *file, | 1042 | static unsigned long determine_vm_flags(struct file *file, |
996 | unsigned long prot, | 1043 | unsigned long prot, |
997 | unsigned long flags, | 1044 | unsigned long flags, |
998 | unsigned long capabilities) | 1045 | unsigned long capabilities) |
999 | { | 1046 | { |
1000 | unsigned long vm_flags; | 1047 | unsigned long vm_flags; |
1001 | 1048 | ||
1002 | vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags); | 1049 | vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags); |
1003 | /* vm_flags |= mm->def_flags; */ | 1050 | /* vm_flags |= mm->def_flags; */ |
1004 | 1051 | ||
1005 | if (!(capabilities & BDI_CAP_MAP_DIRECT)) { | 1052 | if (!(capabilities & BDI_CAP_MAP_DIRECT)) { |
1006 | /* attempt to share read-only copies of mapped file chunks */ | 1053 | /* attempt to share read-only copies of mapped file chunks */ |
1007 | vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; | 1054 | vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; |
1008 | if (file && !(prot & PROT_WRITE)) | 1055 | if (file && !(prot & PROT_WRITE)) |
1009 | vm_flags |= VM_MAYSHARE; | 1056 | vm_flags |= VM_MAYSHARE; |
1010 | } else { | 1057 | } else { |
1011 | /* overlay a shareable mapping on the backing device or inode | 1058 | /* overlay a shareable mapping on the backing device or inode |
1012 | * if possible - used for chardevs, ramfs/tmpfs/shmfs and | 1059 | * if possible - used for chardevs, ramfs/tmpfs/shmfs and |
1013 | * romfs/cramfs */ | 1060 | * romfs/cramfs */ |
1014 | vm_flags |= VM_MAYSHARE | (capabilities & BDI_CAP_VMFLAGS); | 1061 | vm_flags |= VM_MAYSHARE | (capabilities & BDI_CAP_VMFLAGS); |
1015 | if (flags & MAP_SHARED) | 1062 | if (flags & MAP_SHARED) |
1016 | vm_flags |= VM_SHARED; | 1063 | vm_flags |= VM_SHARED; |
1017 | } | 1064 | } |
1018 | 1065 | ||
1019 | /* refuse to let anyone share private mappings with this process if | 1066 | /* refuse to let anyone share private mappings with this process if |
1020 | * it's being traced - otherwise breakpoints set in it may interfere | 1067 | * it's being traced - otherwise breakpoints set in it may interfere |
1021 | * with another untraced process | 1068 | * with another untraced process |
1022 | */ | 1069 | */ |
1023 | if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current)) | 1070 | if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current)) |
1024 | vm_flags &= ~VM_MAYSHARE; | 1071 | vm_flags &= ~VM_MAYSHARE; |
1025 | 1072 | ||
1026 | return vm_flags; | 1073 | return vm_flags; |
1027 | } | 1074 | } |
1028 | 1075 | ||
1029 | /* | 1076 | /* |
1030 | * set up a shared mapping on a file (the driver or filesystem provides and | 1077 | * set up a shared mapping on a file (the driver or filesystem provides and |
1031 | * pins the storage) | 1078 | * pins the storage) |
1032 | */ | 1079 | */ |
1033 | static int do_mmap_shared_file(struct vm_area_struct *vma) | 1080 | static int do_mmap_shared_file(struct vm_area_struct *vma) |
1034 | { | 1081 | { |
1035 | int ret; | 1082 | int ret; |
1036 | 1083 | ||
1037 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); | 1084 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); |
1038 | if (ret == 0) { | 1085 | if (ret == 0) { |
1039 | vma->vm_region->vm_top = vma->vm_region->vm_end; | 1086 | vma->vm_region->vm_top = vma->vm_region->vm_end; |
1040 | return 0; | 1087 | return 0; |
1041 | } | 1088 | } |
1042 | if (ret != -ENOSYS) | 1089 | if (ret != -ENOSYS) |
1043 | return ret; | 1090 | return ret; |
1044 | 1091 | ||
1045 | /* getting -ENOSYS indicates that direct mmap isn't possible (as | 1092 | /* getting -ENOSYS indicates that direct mmap isn't possible (as |
1046 | * opposed to tried but failed) so we can only give a suitable error as | 1093 | * opposed to tried but failed) so we can only give a suitable error as |
1047 | * it's not possible to make a private copy if MAP_SHARED was given */ | 1094 | * it's not possible to make a private copy if MAP_SHARED was given */ |
1048 | return -ENODEV; | 1095 | return -ENODEV; |
1049 | } | 1096 | } |
1050 | 1097 | ||
1051 | /* | 1098 | /* |
1052 | * set up a private mapping or an anonymous shared mapping | 1099 | * set up a private mapping or an anonymous shared mapping |
1053 | */ | 1100 | */ |
1054 | static int do_mmap_private(struct vm_area_struct *vma, | 1101 | static int do_mmap_private(struct vm_area_struct *vma, |
1055 | struct vm_region *region, | 1102 | struct vm_region *region, |
1056 | unsigned long len, | 1103 | unsigned long len, |
1057 | unsigned long capabilities) | 1104 | unsigned long capabilities) |
1058 | { | 1105 | { |
1059 | struct page *pages; | 1106 | struct page *pages; |
1060 | unsigned long total, point, n, rlen; | 1107 | unsigned long total, point, n, rlen; |
1061 | void *base; | 1108 | void *base; |
1062 | int ret, order; | 1109 | int ret, order; |
1063 | 1110 | ||
1064 | /* invoke the file's mapping function so that it can keep track of | 1111 | /* invoke the file's mapping function so that it can keep track of |
1065 | * shared mappings on devices or memory | 1112 | * shared mappings on devices or memory |
1066 | * - VM_MAYSHARE will be set if it may attempt to share | 1113 | * - VM_MAYSHARE will be set if it may attempt to share |
1067 | */ | 1114 | */ |
1068 | if (capabilities & BDI_CAP_MAP_DIRECT) { | 1115 | if (capabilities & BDI_CAP_MAP_DIRECT) { |
1069 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); | 1116 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); |
1070 | if (ret == 0) { | 1117 | if (ret == 0) { |
1071 | /* shouldn't return success if we're not sharing */ | 1118 | /* shouldn't return success if we're not sharing */ |
1072 | BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); | 1119 | BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); |
1073 | vma->vm_region->vm_top = vma->vm_region->vm_end; | 1120 | vma->vm_region->vm_top = vma->vm_region->vm_end; |
1074 | return 0; | 1121 | return 0; |
1075 | } | 1122 | } |
1076 | if (ret != -ENOSYS) | 1123 | if (ret != -ENOSYS) |
1077 | return ret; | 1124 | return ret; |
1078 | 1125 | ||
1079 | /* getting an ENOSYS error indicates that direct mmap isn't | 1126 | /* getting an ENOSYS error indicates that direct mmap isn't |
1080 | * possible (as opposed to tried but failed) so we'll try to | 1127 | * possible (as opposed to tried but failed) so we'll try to |
1081 | * make a private copy of the data and map that instead */ | 1128 | * make a private copy of the data and map that instead */ |
1082 | } | 1129 | } |
1083 | 1130 | ||
1084 | rlen = PAGE_ALIGN(len); | 1131 | rlen = PAGE_ALIGN(len); |
1085 | 1132 | ||
1086 | /* allocate some memory to hold the mapping | 1133 | /* allocate some memory to hold the mapping |
1087 | * - note that this may not return a page-aligned address if the object | 1134 | * - note that this may not return a page-aligned address if the object |
1088 | * we're allocating is smaller than a page | 1135 | * we're allocating is smaller than a page |
1089 | */ | 1136 | */ |
1090 | order = get_order(rlen); | 1137 | order = get_order(rlen); |
1091 | kdebug("alloc order %d for %lx", order, len); | 1138 | kdebug("alloc order %d for %lx", order, len); |
1092 | 1139 | ||
1093 | pages = alloc_pages(GFP_KERNEL, order); | 1140 | pages = alloc_pages(GFP_KERNEL, order); |
1094 | if (!pages) | 1141 | if (!pages) |
1095 | goto enomem; | 1142 | goto enomem; |
1096 | 1143 | ||
1097 | total = 1 << order; | 1144 | total = 1 << order; |
1098 | atomic_long_add(total, &mmap_pages_allocated); | 1145 | atomic_long_add(total, &mmap_pages_allocated); |
1099 | 1146 | ||
1100 | point = rlen >> PAGE_SHIFT; | 1147 | point = rlen >> PAGE_SHIFT; |
1101 | 1148 | ||
1102 | /* we allocated a power-of-2 sized page set, so we may want to trim off | 1149 | /* we allocated a power-of-2 sized page set, so we may want to trim off |
1103 | * the excess */ | 1150 | * the excess */ |
1104 | if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) { | 1151 | if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) { |
1105 | while (total > point) { | 1152 | while (total > point) { |
1106 | order = ilog2(total - point); | 1153 | order = ilog2(total - point); |
1107 | n = 1 << order; | 1154 | n = 1 << order; |
1108 | kdebug("shave %lu/%lu @%lu", n, total - point, total); | 1155 | kdebug("shave %lu/%lu @%lu", n, total - point, total); |
1109 | atomic_long_sub(n, &mmap_pages_allocated); | 1156 | atomic_long_sub(n, &mmap_pages_allocated); |
1110 | total -= n; | 1157 | total -= n; |
1111 | set_page_refcounted(pages + total); | 1158 | set_page_refcounted(pages + total); |
1112 | __free_pages(pages + total, order); | 1159 | __free_pages(pages + total, order); |
1113 | } | 1160 | } |
1114 | } | 1161 | } |
1115 | 1162 | ||
1116 | for (point = 1; point < total; point++) | 1163 | for (point = 1; point < total; point++) |
1117 | set_page_refcounted(&pages[point]); | 1164 | set_page_refcounted(&pages[point]); |
1118 | 1165 | ||
1119 | base = page_address(pages); | 1166 | base = page_address(pages); |
1120 | region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; | 1167 | region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; |
1121 | region->vm_start = (unsigned long) base; | 1168 | region->vm_start = (unsigned long) base; |
1122 | region->vm_end = region->vm_start + rlen; | 1169 | region->vm_end = region->vm_start + rlen; |
1123 | region->vm_top = region->vm_start + (total << PAGE_SHIFT); | 1170 | region->vm_top = region->vm_start + (total << PAGE_SHIFT); |
1124 | 1171 | ||
1125 | vma->vm_start = region->vm_start; | 1172 | vma->vm_start = region->vm_start; |
1126 | vma->vm_end = region->vm_start + len; | 1173 | vma->vm_end = region->vm_start + len; |
1127 | 1174 | ||
1128 | if (vma->vm_file) { | 1175 | if (vma->vm_file) { |
1129 | /* read the contents of a file into the copy */ | 1176 | /* read the contents of a file into the copy */ |
1130 | mm_segment_t old_fs; | 1177 | mm_segment_t old_fs; |
1131 | loff_t fpos; | 1178 | loff_t fpos; |
1132 | 1179 | ||
1133 | fpos = vma->vm_pgoff; | 1180 | fpos = vma->vm_pgoff; |
1134 | fpos <<= PAGE_SHIFT; | 1181 | fpos <<= PAGE_SHIFT; |
1135 | 1182 | ||
1136 | old_fs = get_fs(); | 1183 | old_fs = get_fs(); |
1137 | set_fs(KERNEL_DS); | 1184 | set_fs(KERNEL_DS); |
1138 | ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos); | 1185 | ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos); |
1139 | set_fs(old_fs); | 1186 | set_fs(old_fs); |
1140 | 1187 | ||
1141 | if (ret < 0) | 1188 | if (ret < 0) |
1142 | goto error_free; | 1189 | goto error_free; |
1143 | 1190 | ||
1144 | /* clear the last little bit */ | 1191 | /* clear the last little bit */ |
1145 | if (ret < rlen) | 1192 | if (ret < rlen) |
1146 | memset(base + ret, 0, rlen - ret); | 1193 | memset(base + ret, 0, rlen - ret); |
1147 | 1194 | ||
1148 | } | 1195 | } |
1149 | 1196 | ||
1150 | return 0; | 1197 | return 0; |
1151 | 1198 | ||
1152 | error_free: | 1199 | error_free: |
1153 | free_page_series(region->vm_start, region->vm_end); | 1200 | free_page_series(region->vm_start, region->vm_end); |
1154 | region->vm_start = vma->vm_start = 0; | 1201 | region->vm_start = vma->vm_start = 0; |
1155 | region->vm_end = vma->vm_end = 0; | 1202 | region->vm_end = vma->vm_end = 0; |
1156 | region->vm_top = 0; | 1203 | region->vm_top = 0; |
1157 | return ret; | 1204 | return ret; |
1158 | 1205 | ||
1159 | enomem: | 1206 | enomem: |
1160 | printk("Allocation of length %lu from process %d (%s) failed\n", | 1207 | printk("Allocation of length %lu from process %d (%s) failed\n", |
1161 | len, current->pid, current->comm); | 1208 | len, current->pid, current->comm); |
1162 | show_free_areas(); | 1209 | show_free_areas(); |
1163 | return -ENOMEM; | 1210 | return -ENOMEM; |
1164 | } | 1211 | } |
1165 | 1212 | ||
1166 | /* | 1213 | /* |
1167 | * handle mapping creation for uClinux | 1214 | * handle mapping creation for uClinux |
1168 | */ | 1215 | */ |
1169 | unsigned long do_mmap_pgoff(struct file *file, | 1216 | unsigned long do_mmap_pgoff(struct file *file, |
1170 | unsigned long addr, | 1217 | unsigned long addr, |
1171 | unsigned long len, | 1218 | unsigned long len, |
1172 | unsigned long prot, | 1219 | unsigned long prot, |
1173 | unsigned long flags, | 1220 | unsigned long flags, |
1174 | unsigned long pgoff) | 1221 | unsigned long pgoff) |
1175 | { | 1222 | { |
1176 | struct vm_area_struct *vma; | 1223 | struct vm_area_struct *vma; |
1177 | struct vm_region *region; | 1224 | struct vm_region *region; |
1178 | struct rb_node *rb; | 1225 | struct rb_node *rb; |
1179 | unsigned long capabilities, vm_flags, result; | 1226 | unsigned long capabilities, vm_flags, result; |
1180 | int ret; | 1227 | int ret; |
1181 | 1228 | ||
1182 | kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); | 1229 | kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); |
1183 | 1230 | ||
1184 | /* decide whether we should attempt the mapping, and if so what sort of | 1231 | /* decide whether we should attempt the mapping, and if so what sort of |
1185 | * mapping */ | 1232 | * mapping */ |
1186 | ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, | 1233 | ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, |
1187 | &capabilities); | 1234 | &capabilities); |
1188 | if (ret < 0) { | 1235 | if (ret < 0) { |
1189 | kleave(" = %d [val]", ret); | 1236 | kleave(" = %d [val]", ret); |
1190 | return ret; | 1237 | return ret; |
1191 | } | 1238 | } |
1192 | 1239 | ||
1193 | /* we ignore the address hint */ | 1240 | /* we ignore the address hint */ |
1194 | addr = 0; | 1241 | addr = 0; |
1195 | 1242 | ||
1196 | /* we've determined that we can make the mapping, now translate what we | 1243 | /* we've determined that we can make the mapping, now translate what we |
1197 | * now know into VMA flags */ | 1244 | * now know into VMA flags */ |
1198 | vm_flags = determine_vm_flags(file, prot, flags, capabilities); | 1245 | vm_flags = determine_vm_flags(file, prot, flags, capabilities); |
1199 | 1246 | ||
1200 | /* we're going to need to record the mapping */ | 1247 | /* we're going to need to record the mapping */ |
1201 | region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); | 1248 | region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); |
1202 | if (!region) | 1249 | if (!region) |
1203 | goto error_getting_region; | 1250 | goto error_getting_region; |
1204 | 1251 | ||
1205 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); | 1252 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); |
1206 | if (!vma) | 1253 | if (!vma) |
1207 | goto error_getting_vma; | 1254 | goto error_getting_vma; |
1208 | 1255 | ||
1209 | region->vm_usage = 1; | 1256 | region->vm_usage = 1; |
1210 | region->vm_flags = vm_flags; | 1257 | region->vm_flags = vm_flags; |
1211 | region->vm_pgoff = pgoff; | 1258 | region->vm_pgoff = pgoff; |
1212 | 1259 | ||
1213 | INIT_LIST_HEAD(&vma->anon_vma_chain); | 1260 | INIT_LIST_HEAD(&vma->anon_vma_chain); |
1214 | vma->vm_flags = vm_flags; | 1261 | vma->vm_flags = vm_flags; |
1215 | vma->vm_pgoff = pgoff; | 1262 | vma->vm_pgoff = pgoff; |
1216 | 1263 | ||
1217 | if (file) { | 1264 | if (file) { |
1218 | region->vm_file = file; | 1265 | region->vm_file = file; |
1219 | get_file(file); | 1266 | get_file(file); |
1220 | vma->vm_file = file; | 1267 | vma->vm_file = file; |
1221 | get_file(file); | 1268 | get_file(file); |
1222 | if (vm_flags & VM_EXECUTABLE) { | 1269 | if (vm_flags & VM_EXECUTABLE) { |
1223 | added_exe_file_vma(current->mm); | 1270 | added_exe_file_vma(current->mm); |
1224 | vma->vm_mm = current->mm; | 1271 | vma->vm_mm = current->mm; |
1225 | } | 1272 | } |
1226 | } | 1273 | } |
1227 | 1274 | ||
1228 | down_write(&nommu_region_sem); | 1275 | down_write(&nommu_region_sem); |
1229 | 1276 | ||
1230 | /* if we want to share, we need to check for regions created by other | 1277 | /* if we want to share, we need to check for regions created by other |
1231 | * mmap() calls that overlap with our proposed mapping | 1278 | * mmap() calls that overlap with our proposed mapping |
1232 | * - we can only share with a superset match on most regular files | 1279 | * - we can only share with a superset match on most regular files |
1233 | * - shared mappings on character devices and memory backed files are | 1280 | * - shared mappings on character devices and memory backed files are |
1234 | * permitted to overlap inexactly as far as we are concerned for in | 1281 | * permitted to overlap inexactly as far as we are concerned for in |
1235 | * these cases, sharing is handled in the driver or filesystem rather | 1282 | * these cases, sharing is handled in the driver or filesystem rather |
1236 | * than here | 1283 | * than here |
1237 | */ | 1284 | */ |
1238 | if (vm_flags & VM_MAYSHARE) { | 1285 | if (vm_flags & VM_MAYSHARE) { |
1239 | struct vm_region *pregion; | 1286 | struct vm_region *pregion; |
1240 | unsigned long pglen, rpglen, pgend, rpgend, start; | 1287 | unsigned long pglen, rpglen, pgend, rpgend, start; |
1241 | 1288 | ||
1242 | pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1289 | pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1243 | pgend = pgoff + pglen; | 1290 | pgend = pgoff + pglen; |
1244 | 1291 | ||
1245 | for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { | 1292 | for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { |
1246 | pregion = rb_entry(rb, struct vm_region, vm_rb); | 1293 | pregion = rb_entry(rb, struct vm_region, vm_rb); |
1247 | 1294 | ||
1248 | if (!(pregion->vm_flags & VM_MAYSHARE)) | 1295 | if (!(pregion->vm_flags & VM_MAYSHARE)) |
1249 | continue; | 1296 | continue; |
1250 | 1297 | ||
1251 | /* search for overlapping mappings on the same file */ | 1298 | /* search for overlapping mappings on the same file */ |
1252 | if (pregion->vm_file->f_path.dentry->d_inode != | 1299 | if (pregion->vm_file->f_path.dentry->d_inode != |
1253 | file->f_path.dentry->d_inode) | 1300 | file->f_path.dentry->d_inode) |
1254 | continue; | 1301 | continue; |
1255 | 1302 | ||
1256 | if (pregion->vm_pgoff >= pgend) | 1303 | if (pregion->vm_pgoff >= pgend) |
1257 | continue; | 1304 | continue; |
1258 | 1305 | ||
1259 | rpglen = pregion->vm_end - pregion->vm_start; | 1306 | rpglen = pregion->vm_end - pregion->vm_start; |
1260 | rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1307 | rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1261 | rpgend = pregion->vm_pgoff + rpglen; | 1308 | rpgend = pregion->vm_pgoff + rpglen; |
1262 | if (pgoff >= rpgend) | 1309 | if (pgoff >= rpgend) |
1263 | continue; | 1310 | continue; |
1264 | 1311 | ||
1265 | /* handle inexactly overlapping matches between | 1312 | /* handle inexactly overlapping matches between |
1266 | * mappings */ | 1313 | * mappings */ |
1267 | if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && | 1314 | if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && |
1268 | !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { | 1315 | !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { |
1269 | /* new mapping is not a subset of the region */ | 1316 | /* new mapping is not a subset of the region */ |
1270 | if (!(capabilities & BDI_CAP_MAP_DIRECT)) | 1317 | if (!(capabilities & BDI_CAP_MAP_DIRECT)) |
1271 | goto sharing_violation; | 1318 | goto sharing_violation; |
1272 | continue; | 1319 | continue; |
1273 | } | 1320 | } |
1274 | 1321 | ||
1275 | /* we've found a region we can share */ | 1322 | /* we've found a region we can share */ |
1276 | pregion->vm_usage++; | 1323 | pregion->vm_usage++; |
1277 | vma->vm_region = pregion; | 1324 | vma->vm_region = pregion; |
1278 | start = pregion->vm_start; | 1325 | start = pregion->vm_start; |
1279 | start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; | 1326 | start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; |
1280 | vma->vm_start = start; | 1327 | vma->vm_start = start; |
1281 | vma->vm_end = start + len; | 1328 | vma->vm_end = start + len; |
1282 | 1329 | ||
1283 | if (pregion->vm_flags & VM_MAPPED_COPY) { | 1330 | if (pregion->vm_flags & VM_MAPPED_COPY) { |
1284 | kdebug("share copy"); | 1331 | kdebug("share copy"); |
1285 | vma->vm_flags |= VM_MAPPED_COPY; | 1332 | vma->vm_flags |= VM_MAPPED_COPY; |
1286 | } else { | 1333 | } else { |
1287 | kdebug("share mmap"); | 1334 | kdebug("share mmap"); |
1288 | ret = do_mmap_shared_file(vma); | 1335 | ret = do_mmap_shared_file(vma); |
1289 | if (ret < 0) { | 1336 | if (ret < 0) { |
1290 | vma->vm_region = NULL; | 1337 | vma->vm_region = NULL; |
1291 | vma->vm_start = 0; | 1338 | vma->vm_start = 0; |
1292 | vma->vm_end = 0; | 1339 | vma->vm_end = 0; |
1293 | pregion->vm_usage--; | 1340 | pregion->vm_usage--; |
1294 | pregion = NULL; | 1341 | pregion = NULL; |
1295 | goto error_just_free; | 1342 | goto error_just_free; |
1296 | } | 1343 | } |
1297 | } | 1344 | } |
1298 | fput(region->vm_file); | 1345 | fput(region->vm_file); |
1299 | kmem_cache_free(vm_region_jar, region); | 1346 | kmem_cache_free(vm_region_jar, region); |
1300 | region = pregion; | 1347 | region = pregion; |
1301 | result = start; | 1348 | result = start; |
1302 | goto share; | 1349 | goto share; |
1303 | } | 1350 | } |
1304 | 1351 | ||
1305 | /* obtain the address at which to make a shared mapping | 1352 | /* obtain the address at which to make a shared mapping |
1306 | * - this is the hook for quasi-memory character devices to | 1353 | * - this is the hook for quasi-memory character devices to |
1307 | * tell us the location of a shared mapping | 1354 | * tell us the location of a shared mapping |
1308 | */ | 1355 | */ |
1309 | if (capabilities & BDI_CAP_MAP_DIRECT) { | 1356 | if (capabilities & BDI_CAP_MAP_DIRECT) { |
1310 | addr = file->f_op->get_unmapped_area(file, addr, len, | 1357 | addr = file->f_op->get_unmapped_area(file, addr, len, |
1311 | pgoff, flags); | 1358 | pgoff, flags); |
1312 | if (IS_ERR((void *) addr)) { | 1359 | if (IS_ERR((void *) addr)) { |
1313 | ret = addr; | 1360 | ret = addr; |
1314 | if (ret != (unsigned long) -ENOSYS) | 1361 | if (ret != (unsigned long) -ENOSYS) |
1315 | goto error_just_free; | 1362 | goto error_just_free; |
1316 | 1363 | ||
1317 | /* the driver refused to tell us where to site | 1364 | /* the driver refused to tell us where to site |
1318 | * the mapping so we'll have to attempt to copy | 1365 | * the mapping so we'll have to attempt to copy |
1319 | * it */ | 1366 | * it */ |
1320 | ret = (unsigned long) -ENODEV; | 1367 | ret = (unsigned long) -ENODEV; |
1321 | if (!(capabilities & BDI_CAP_MAP_COPY)) | 1368 | if (!(capabilities & BDI_CAP_MAP_COPY)) |
1322 | goto error_just_free; | 1369 | goto error_just_free; |
1323 | 1370 | ||
1324 | capabilities &= ~BDI_CAP_MAP_DIRECT; | 1371 | capabilities &= ~BDI_CAP_MAP_DIRECT; |
1325 | } else { | 1372 | } else { |
1326 | vma->vm_start = region->vm_start = addr; | 1373 | vma->vm_start = region->vm_start = addr; |
1327 | vma->vm_end = region->vm_end = addr + len; | 1374 | vma->vm_end = region->vm_end = addr + len; |
1328 | } | 1375 | } |
1329 | } | 1376 | } |
1330 | } | 1377 | } |
1331 | 1378 | ||
1332 | vma->vm_region = region; | 1379 | vma->vm_region = region; |
1333 | 1380 | ||
1334 | /* set up the mapping | 1381 | /* set up the mapping |
1335 | * - the region is filled in if BDI_CAP_MAP_DIRECT is still set | 1382 | * - the region is filled in if BDI_CAP_MAP_DIRECT is still set |
1336 | */ | 1383 | */ |
1337 | if (file && vma->vm_flags & VM_SHARED) | 1384 | if (file && vma->vm_flags & VM_SHARED) |
1338 | ret = do_mmap_shared_file(vma); | 1385 | ret = do_mmap_shared_file(vma); |
1339 | else | 1386 | else |
1340 | ret = do_mmap_private(vma, region, len, capabilities); | 1387 | ret = do_mmap_private(vma, region, len, capabilities); |
1341 | if (ret < 0) | 1388 | if (ret < 0) |
1342 | goto error_just_free; | 1389 | goto error_just_free; |
1343 | add_nommu_region(region); | 1390 | add_nommu_region(region); |
1344 | 1391 | ||
1345 | /* clear anonymous mappings that don't ask for uninitialized data */ | 1392 | /* clear anonymous mappings that don't ask for uninitialized data */ |
1346 | if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) | 1393 | if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) |
1347 | memset((void *)region->vm_start, 0, | 1394 | memset((void *)region->vm_start, 0, |
1348 | region->vm_end - region->vm_start); | 1395 | region->vm_end - region->vm_start); |
1349 | 1396 | ||
1350 | /* okay... we have a mapping; now we have to register it */ | 1397 | /* okay... we have a mapping; now we have to register it */ |
1351 | result = vma->vm_start; | 1398 | result = vma->vm_start; |
1352 | 1399 | ||
1353 | current->mm->total_vm += len >> PAGE_SHIFT; | 1400 | current->mm->total_vm += len >> PAGE_SHIFT; |
1354 | 1401 | ||
1355 | share: | 1402 | share: |
1356 | add_vma_to_mm(current->mm, vma); | 1403 | add_vma_to_mm(current->mm, vma); |
1357 | 1404 | ||
1358 | /* we flush the region from the icache only when the first executable | 1405 | /* we flush the region from the icache only when the first executable |
1359 | * mapping of it is made */ | 1406 | * mapping of it is made */ |
1360 | if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { | 1407 | if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { |
1361 | flush_icache_range(region->vm_start, region->vm_end); | 1408 | flush_icache_range(region->vm_start, region->vm_end); |
1362 | region->vm_icache_flushed = true; | 1409 | region->vm_icache_flushed = true; |
1363 | } | 1410 | } |
1364 | 1411 | ||
1365 | up_write(&nommu_region_sem); | 1412 | up_write(&nommu_region_sem); |
1366 | 1413 | ||
1367 | kleave(" = %lx", result); | 1414 | kleave(" = %lx", result); |
1368 | return result; | 1415 | return result; |
1369 | 1416 | ||
1370 | error_just_free: | 1417 | error_just_free: |
1371 | up_write(&nommu_region_sem); | 1418 | up_write(&nommu_region_sem); |
1372 | error: | 1419 | error: |
1373 | if (region->vm_file) | 1420 | if (region->vm_file) |
1374 | fput(region->vm_file); | 1421 | fput(region->vm_file); |
1375 | kmem_cache_free(vm_region_jar, region); | 1422 | kmem_cache_free(vm_region_jar, region); |
1376 | if (vma->vm_file) | 1423 | if (vma->vm_file) |
1377 | fput(vma->vm_file); | 1424 | fput(vma->vm_file); |
1378 | if (vma->vm_flags & VM_EXECUTABLE) | 1425 | if (vma->vm_flags & VM_EXECUTABLE) |
1379 | removed_exe_file_vma(vma->vm_mm); | 1426 | removed_exe_file_vma(vma->vm_mm); |
1380 | kmem_cache_free(vm_area_cachep, vma); | 1427 | kmem_cache_free(vm_area_cachep, vma); |
1381 | kleave(" = %d", ret); | 1428 | kleave(" = %d", ret); |
1382 | return ret; | 1429 | return ret; |
1383 | 1430 | ||
1384 | sharing_violation: | 1431 | sharing_violation: |
1385 | up_write(&nommu_region_sem); | 1432 | up_write(&nommu_region_sem); |
1386 | printk(KERN_WARNING "Attempt to share mismatched mappings\n"); | 1433 | printk(KERN_WARNING "Attempt to share mismatched mappings\n"); |
1387 | ret = -EINVAL; | 1434 | ret = -EINVAL; |
1388 | goto error; | 1435 | goto error; |
1389 | 1436 | ||
1390 | error_getting_vma: | 1437 | error_getting_vma: |
1391 | kmem_cache_free(vm_region_jar, region); | 1438 | kmem_cache_free(vm_region_jar, region); |
1392 | printk(KERN_WARNING "Allocation of vma for %lu byte allocation" | 1439 | printk(KERN_WARNING "Allocation of vma for %lu byte allocation" |
1393 | " from process %d failed\n", | 1440 | " from process %d failed\n", |
1394 | len, current->pid); | 1441 | len, current->pid); |
1395 | show_free_areas(); | 1442 | show_free_areas(); |
1396 | return -ENOMEM; | 1443 | return -ENOMEM; |
1397 | 1444 | ||
1398 | error_getting_region: | 1445 | error_getting_region: |
1399 | printk(KERN_WARNING "Allocation of vm region for %lu byte allocation" | 1446 | printk(KERN_WARNING "Allocation of vm region for %lu byte allocation" |
1400 | " from process %d failed\n", | 1447 | " from process %d failed\n", |
1401 | len, current->pid); | 1448 | len, current->pid); |
1402 | show_free_areas(); | 1449 | show_free_areas(); |
1403 | return -ENOMEM; | 1450 | return -ENOMEM; |
1404 | } | 1451 | } |
1405 | EXPORT_SYMBOL(do_mmap_pgoff); | 1452 | EXPORT_SYMBOL(do_mmap_pgoff); |
1406 | 1453 | ||
1407 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | 1454 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, |
1408 | unsigned long, prot, unsigned long, flags, | 1455 | unsigned long, prot, unsigned long, flags, |
1409 | unsigned long, fd, unsigned long, pgoff) | 1456 | unsigned long, fd, unsigned long, pgoff) |
1410 | { | 1457 | { |
1411 | struct file *file = NULL; | 1458 | struct file *file = NULL; |
1412 | unsigned long retval = -EBADF; | 1459 | unsigned long retval = -EBADF; |
1413 | 1460 | ||
1414 | if (!(flags & MAP_ANONYMOUS)) { | 1461 | if (!(flags & MAP_ANONYMOUS)) { |
1415 | file = fget(fd); | 1462 | file = fget(fd); |
1416 | if (!file) | 1463 | if (!file) |
1417 | goto out; | 1464 | goto out; |
1418 | } | 1465 | } |
1419 | 1466 | ||
1420 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | 1467 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); |
1421 | 1468 | ||
1422 | down_write(¤t->mm->mmap_sem); | 1469 | down_write(¤t->mm->mmap_sem); |
1423 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | 1470 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); |
1424 | up_write(¤t->mm->mmap_sem); | 1471 | up_write(¤t->mm->mmap_sem); |
1425 | 1472 | ||
1426 | if (file) | 1473 | if (file) |
1427 | fput(file); | 1474 | fput(file); |
1428 | out: | 1475 | out: |
1429 | return retval; | 1476 | return retval; |
1430 | } | 1477 | } |
1431 | 1478 | ||
1432 | #ifdef __ARCH_WANT_SYS_OLD_MMAP | 1479 | #ifdef __ARCH_WANT_SYS_OLD_MMAP |
1433 | struct mmap_arg_struct { | 1480 | struct mmap_arg_struct { |
1434 | unsigned long addr; | 1481 | unsigned long addr; |
1435 | unsigned long len; | 1482 | unsigned long len; |
1436 | unsigned long prot; | 1483 | unsigned long prot; |
1437 | unsigned long flags; | 1484 | unsigned long flags; |
1438 | unsigned long fd; | 1485 | unsigned long fd; |
1439 | unsigned long offset; | 1486 | unsigned long offset; |
1440 | }; | 1487 | }; |
1441 | 1488 | ||
1442 | SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) | 1489 | SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) |
1443 | { | 1490 | { |
1444 | struct mmap_arg_struct a; | 1491 | struct mmap_arg_struct a; |
1445 | 1492 | ||
1446 | if (copy_from_user(&a, arg, sizeof(a))) | 1493 | if (copy_from_user(&a, arg, sizeof(a))) |
1447 | return -EFAULT; | 1494 | return -EFAULT; |
1448 | if (a.offset & ~PAGE_MASK) | 1495 | if (a.offset & ~PAGE_MASK) |
1449 | return -EINVAL; | 1496 | return -EINVAL; |
1450 | 1497 | ||
1451 | return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, | 1498 | return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, |
1452 | a.offset >> PAGE_SHIFT); | 1499 | a.offset >> PAGE_SHIFT); |
1453 | } | 1500 | } |
1454 | #endif /* __ARCH_WANT_SYS_OLD_MMAP */ | 1501 | #endif /* __ARCH_WANT_SYS_OLD_MMAP */ |
1455 | 1502 | ||
1456 | /* | 1503 | /* |
1457 | * split a vma into two pieces at address 'addr', a new vma is allocated either | 1504 | * split a vma into two pieces at address 'addr', a new vma is allocated either |
1458 | * for the first part or the tail. | 1505 | * for the first part or the tail. |
1459 | */ | 1506 | */ |
1460 | int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, | 1507 | int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, |
1461 | unsigned long addr, int new_below) | 1508 | unsigned long addr, int new_below) |
1462 | { | 1509 | { |
1463 | struct vm_area_struct *new; | 1510 | struct vm_area_struct *new; |
1464 | struct vm_region *region; | 1511 | struct vm_region *region; |
1465 | unsigned long npages; | 1512 | unsigned long npages; |
1466 | 1513 | ||
1467 | kenter(""); | 1514 | kenter(""); |
1468 | 1515 | ||
1469 | /* we're only permitted to split anonymous regions (these should have | 1516 | /* we're only permitted to split anonymous regions (these should have |
1470 | * only a single usage on the region) */ | 1517 | * only a single usage on the region) */ |
1471 | if (vma->vm_file) | 1518 | if (vma->vm_file) |
1472 | return -ENOMEM; | 1519 | return -ENOMEM; |
1473 | 1520 | ||
1474 | if (mm->map_count >= sysctl_max_map_count) | 1521 | if (mm->map_count >= sysctl_max_map_count) |
1475 | return -ENOMEM; | 1522 | return -ENOMEM; |
1476 | 1523 | ||
1477 | region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); | 1524 | region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); |
1478 | if (!region) | 1525 | if (!region) |
1479 | return -ENOMEM; | 1526 | return -ENOMEM; |
1480 | 1527 | ||
1481 | new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); | 1528 | new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); |
1482 | if (!new) { | 1529 | if (!new) { |
1483 | kmem_cache_free(vm_region_jar, region); | 1530 | kmem_cache_free(vm_region_jar, region); |
1484 | return -ENOMEM; | 1531 | return -ENOMEM; |
1485 | } | 1532 | } |
1486 | 1533 | ||
1487 | /* most fields are the same, copy all, and then fixup */ | 1534 | /* most fields are the same, copy all, and then fixup */ |
1488 | *new = *vma; | 1535 | *new = *vma; |
1489 | *region = *vma->vm_region; | 1536 | *region = *vma->vm_region; |
1490 | new->vm_region = region; | 1537 | new->vm_region = region; |
1491 | 1538 | ||
1492 | npages = (addr - vma->vm_start) >> PAGE_SHIFT; | 1539 | npages = (addr - vma->vm_start) >> PAGE_SHIFT; |
1493 | 1540 | ||
1494 | if (new_below) { | 1541 | if (new_below) { |
1495 | region->vm_top = region->vm_end = new->vm_end = addr; | 1542 | region->vm_top = region->vm_end = new->vm_end = addr; |
1496 | } else { | 1543 | } else { |
1497 | region->vm_start = new->vm_start = addr; | 1544 | region->vm_start = new->vm_start = addr; |
1498 | region->vm_pgoff = new->vm_pgoff += npages; | 1545 | region->vm_pgoff = new->vm_pgoff += npages; |
1499 | } | 1546 | } |
1500 | 1547 | ||
1501 | if (new->vm_ops && new->vm_ops->open) | 1548 | if (new->vm_ops && new->vm_ops->open) |
1502 | new->vm_ops->open(new); | 1549 | new->vm_ops->open(new); |
1503 | 1550 | ||
1504 | delete_vma_from_mm(vma); | 1551 | delete_vma_from_mm(vma); |
1505 | down_write(&nommu_region_sem); | 1552 | down_write(&nommu_region_sem); |
1506 | delete_nommu_region(vma->vm_region); | 1553 | delete_nommu_region(vma->vm_region); |
1507 | if (new_below) { | 1554 | if (new_below) { |
1508 | vma->vm_region->vm_start = vma->vm_start = addr; | 1555 | vma->vm_region->vm_start = vma->vm_start = addr; |
1509 | vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; | 1556 | vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; |
1510 | } else { | 1557 | } else { |
1511 | vma->vm_region->vm_end = vma->vm_end = addr; | 1558 | vma->vm_region->vm_end = vma->vm_end = addr; |
1512 | vma->vm_region->vm_top = addr; | 1559 | vma->vm_region->vm_top = addr; |
1513 | } | 1560 | } |
1514 | add_nommu_region(vma->vm_region); | 1561 | add_nommu_region(vma->vm_region); |
1515 | add_nommu_region(new->vm_region); | 1562 | add_nommu_region(new->vm_region); |
1516 | up_write(&nommu_region_sem); | 1563 | up_write(&nommu_region_sem); |
1517 | add_vma_to_mm(mm, vma); | 1564 | add_vma_to_mm(mm, vma); |
1518 | add_vma_to_mm(mm, new); | 1565 | add_vma_to_mm(mm, new); |
1519 | return 0; | 1566 | return 0; |
1520 | } | 1567 | } |
1521 | 1568 | ||
1522 | /* | 1569 | /* |
1523 | * shrink a VMA by removing the specified chunk from either the beginning or | 1570 | * shrink a VMA by removing the specified chunk from either the beginning or |
1524 | * the end | 1571 | * the end |
1525 | */ | 1572 | */ |
1526 | static int shrink_vma(struct mm_struct *mm, | 1573 | static int shrink_vma(struct mm_struct *mm, |
1527 | struct vm_area_struct *vma, | 1574 | struct vm_area_struct *vma, |
1528 | unsigned long from, unsigned long to) | 1575 | unsigned long from, unsigned long to) |
1529 | { | 1576 | { |
1530 | struct vm_region *region; | 1577 | struct vm_region *region; |
1531 | 1578 | ||
1532 | kenter(""); | 1579 | kenter(""); |
1533 | 1580 | ||
1534 | /* adjust the VMA's pointers, which may reposition it in the MM's tree | 1581 | /* adjust the VMA's pointers, which may reposition it in the MM's tree |
1535 | * and list */ | 1582 | * and list */ |
1536 | delete_vma_from_mm(vma); | 1583 | delete_vma_from_mm(vma); |
1537 | if (from > vma->vm_start) | 1584 | if (from > vma->vm_start) |
1538 | vma->vm_end = from; | 1585 | vma->vm_end = from; |
1539 | else | 1586 | else |
1540 | vma->vm_start = to; | 1587 | vma->vm_start = to; |
1541 | add_vma_to_mm(mm, vma); | 1588 | add_vma_to_mm(mm, vma); |
1542 | 1589 | ||
1543 | /* cut the backing region down to size */ | 1590 | /* cut the backing region down to size */ |
1544 | region = vma->vm_region; | 1591 | region = vma->vm_region; |
1545 | BUG_ON(region->vm_usage != 1); | 1592 | BUG_ON(region->vm_usage != 1); |
1546 | 1593 | ||
1547 | down_write(&nommu_region_sem); | 1594 | down_write(&nommu_region_sem); |
1548 | delete_nommu_region(region); | 1595 | delete_nommu_region(region); |
1549 | if (from > region->vm_start) { | 1596 | if (from > region->vm_start) { |
1550 | to = region->vm_top; | 1597 | to = region->vm_top; |
1551 | region->vm_top = region->vm_end = from; | 1598 | region->vm_top = region->vm_end = from; |
1552 | } else { | 1599 | } else { |
1553 | region->vm_start = to; | 1600 | region->vm_start = to; |
1554 | } | 1601 | } |
1555 | add_nommu_region(region); | 1602 | add_nommu_region(region); |
1556 | up_write(&nommu_region_sem); | 1603 | up_write(&nommu_region_sem); |
1557 | 1604 | ||
1558 | free_page_series(from, to); | 1605 | free_page_series(from, to); |
1559 | return 0; | 1606 | return 0; |
1560 | } | 1607 | } |
1561 | 1608 | ||
1562 | /* | 1609 | /* |
1563 | * release a mapping | 1610 | * release a mapping |
1564 | * - under NOMMU conditions the chunk to be unmapped must be backed by a single | 1611 | * - under NOMMU conditions the chunk to be unmapped must be backed by a single |
1565 | * VMA, though it need not cover the whole VMA | 1612 | * VMA, though it need not cover the whole VMA |
1566 | */ | 1613 | */ |
1567 | int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) | 1614 | int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) |
1568 | { | 1615 | { |
1569 | struct vm_area_struct *vma; | 1616 | struct vm_area_struct *vma; |
1570 | struct rb_node *rb; | 1617 | struct rb_node *rb; |
1571 | unsigned long end = start + len; | 1618 | unsigned long end = start + len; |
1572 | int ret; | 1619 | int ret; |
1573 | 1620 | ||
1574 | kenter(",%lx,%zx", start, len); | 1621 | kenter(",%lx,%zx", start, len); |
1575 | 1622 | ||
1576 | if (len == 0) | 1623 | if (len == 0) |
1577 | return -EINVAL; | 1624 | return -EINVAL; |
1578 | 1625 | ||
1579 | /* find the first potentially overlapping VMA */ | 1626 | /* find the first potentially overlapping VMA */ |
1580 | vma = find_vma(mm, start); | 1627 | vma = find_vma(mm, start); |
1581 | if (!vma) { | 1628 | if (!vma) { |
1582 | static int limit = 0; | 1629 | static int limit = 0; |
1583 | if (limit < 5) { | 1630 | if (limit < 5) { |
1584 | printk(KERN_WARNING | 1631 | printk(KERN_WARNING |
1585 | "munmap of memory not mmapped by process %d" | 1632 | "munmap of memory not mmapped by process %d" |
1586 | " (%s): 0x%lx-0x%lx\n", | 1633 | " (%s): 0x%lx-0x%lx\n", |
1587 | current->pid, current->comm, | 1634 | current->pid, current->comm, |
1588 | start, start + len - 1); | 1635 | start, start + len - 1); |
1589 | limit++; | 1636 | limit++; |
1590 | } | 1637 | } |
1591 | return -EINVAL; | 1638 | return -EINVAL; |
1592 | } | 1639 | } |
1593 | 1640 | ||
1594 | /* we're allowed to split an anonymous VMA but not a file-backed one */ | 1641 | /* we're allowed to split an anonymous VMA but not a file-backed one */ |
1595 | if (vma->vm_file) { | 1642 | if (vma->vm_file) { |
1596 | do { | 1643 | do { |
1597 | if (start > vma->vm_start) { | 1644 | if (start > vma->vm_start) { |
1598 | kleave(" = -EINVAL [miss]"); | 1645 | kleave(" = -EINVAL [miss]"); |
1599 | return -EINVAL; | 1646 | return -EINVAL; |
1600 | } | 1647 | } |
1601 | if (end == vma->vm_end) | 1648 | if (end == vma->vm_end) |
1602 | goto erase_whole_vma; | 1649 | goto erase_whole_vma; |
1603 | rb = rb_next(&vma->vm_rb); | 1650 | rb = rb_next(&vma->vm_rb); |
1604 | vma = rb_entry(rb, struct vm_area_struct, vm_rb); | 1651 | vma = rb_entry(rb, struct vm_area_struct, vm_rb); |
1605 | } while (rb); | 1652 | } while (rb); |
1606 | kleave(" = -EINVAL [split file]"); | 1653 | kleave(" = -EINVAL [split file]"); |
1607 | return -EINVAL; | 1654 | return -EINVAL; |
1608 | } else { | 1655 | } else { |
1609 | /* the chunk must be a subset of the VMA found */ | 1656 | /* the chunk must be a subset of the VMA found */ |
1610 | if (start == vma->vm_start && end == vma->vm_end) | 1657 | if (start == vma->vm_start && end == vma->vm_end) |
1611 | goto erase_whole_vma; | 1658 | goto erase_whole_vma; |
1612 | if (start < vma->vm_start || end > vma->vm_end) { | 1659 | if (start < vma->vm_start || end > vma->vm_end) { |
1613 | kleave(" = -EINVAL [superset]"); | 1660 | kleave(" = -EINVAL [superset]"); |
1614 | return -EINVAL; | 1661 | return -EINVAL; |
1615 | } | 1662 | } |
1616 | if (start & ~PAGE_MASK) { | 1663 | if (start & ~PAGE_MASK) { |
1617 | kleave(" = -EINVAL [unaligned start]"); | 1664 | kleave(" = -EINVAL [unaligned start]"); |
1618 | return -EINVAL; | 1665 | return -EINVAL; |
1619 | } | 1666 | } |
1620 | if (end != vma->vm_end && end & ~PAGE_MASK) { | 1667 | if (end != vma->vm_end && end & ~PAGE_MASK) { |
1621 | kleave(" = -EINVAL [unaligned split]"); | 1668 | kleave(" = -EINVAL [unaligned split]"); |
1622 | return -EINVAL; | 1669 | return -EINVAL; |
1623 | } | 1670 | } |
1624 | if (start != vma->vm_start && end != vma->vm_end) { | 1671 | if (start != vma->vm_start && end != vma->vm_end) { |
1625 | ret = split_vma(mm, vma, start, 1); | 1672 | ret = split_vma(mm, vma, start, 1); |
1626 | if (ret < 0) { | 1673 | if (ret < 0) { |
1627 | kleave(" = %d [split]", ret); | 1674 | kleave(" = %d [split]", ret); |
1628 | return ret; | 1675 | return ret; |
1629 | } | 1676 | } |
1630 | } | 1677 | } |
1631 | return shrink_vma(mm, vma, start, end); | 1678 | return shrink_vma(mm, vma, start, end); |
1632 | } | 1679 | } |
1633 | 1680 | ||
1634 | erase_whole_vma: | 1681 | erase_whole_vma: |
1635 | delete_vma_from_mm(vma); | 1682 | delete_vma_from_mm(vma); |
1636 | delete_vma(mm, vma); | 1683 | delete_vma(mm, vma); |
1637 | kleave(" = 0"); | 1684 | kleave(" = 0"); |
1638 | return 0; | 1685 | return 0; |
1639 | } | 1686 | } |
1640 | EXPORT_SYMBOL(do_munmap); | 1687 | EXPORT_SYMBOL(do_munmap); |
1641 | 1688 | ||
1642 | SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) | 1689 | SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) |
1643 | { | 1690 | { |
1644 | int ret; | 1691 | int ret; |
1645 | struct mm_struct *mm = current->mm; | 1692 | struct mm_struct *mm = current->mm; |
1646 | 1693 | ||
1647 | down_write(&mm->mmap_sem); | 1694 | down_write(&mm->mmap_sem); |
1648 | ret = do_munmap(mm, addr, len); | 1695 | ret = do_munmap(mm, addr, len); |
1649 | up_write(&mm->mmap_sem); | 1696 | up_write(&mm->mmap_sem); |
1650 | return ret; | 1697 | return ret; |
1651 | } | 1698 | } |
1652 | 1699 | ||
1653 | /* | 1700 | /* |
1654 | * release all the mappings made in a process's VM space | 1701 | * release all the mappings made in a process's VM space |
1655 | */ | 1702 | */ |
1656 | void exit_mmap(struct mm_struct *mm) | 1703 | void exit_mmap(struct mm_struct *mm) |
1657 | { | 1704 | { |
1658 | struct vm_area_struct *vma; | 1705 | struct vm_area_struct *vma; |
1659 | 1706 | ||
1660 | if (!mm) | 1707 | if (!mm) |
1661 | return; | 1708 | return; |
1662 | 1709 | ||
1663 | kenter(""); | 1710 | kenter(""); |
1664 | 1711 | ||
1665 | mm->total_vm = 0; | 1712 | mm->total_vm = 0; |
1666 | 1713 | ||
1667 | while ((vma = mm->mmap)) { | 1714 | while ((vma = mm->mmap)) { |
1668 | mm->mmap = vma->vm_next; | 1715 | mm->mmap = vma->vm_next; |
1669 | delete_vma_from_mm(vma); | 1716 | delete_vma_from_mm(vma); |
1670 | delete_vma(mm, vma); | 1717 | delete_vma(mm, vma); |
1671 | } | 1718 | } |
1672 | 1719 | ||
1673 | kleave(""); | 1720 | kleave(""); |
1674 | } | 1721 | } |
1675 | 1722 | ||
1676 | unsigned long do_brk(unsigned long addr, unsigned long len) | 1723 | unsigned long do_brk(unsigned long addr, unsigned long len) |
1677 | { | 1724 | { |
1678 | return -ENOMEM; | 1725 | return -ENOMEM; |
1679 | } | 1726 | } |
1680 | 1727 | ||
1681 | /* | 1728 | /* |
1682 | * expand (or shrink) an existing mapping, potentially moving it at the same | 1729 | * expand (or shrink) an existing mapping, potentially moving it at the same |
1683 | * time (controlled by the MREMAP_MAYMOVE flag and available VM space) | 1730 | * time (controlled by the MREMAP_MAYMOVE flag and available VM space) |
1684 | * | 1731 | * |
1685 | * under NOMMU conditions, we only permit changing a mapping's size, and only | 1732 | * under NOMMU conditions, we only permit changing a mapping's size, and only |
1686 | * as long as it stays within the region allocated by do_mmap_private() and the | 1733 | * as long as it stays within the region allocated by do_mmap_private() and the |
1687 | * block is not shareable | 1734 | * block is not shareable |
1688 | * | 1735 | * |
1689 | * MREMAP_FIXED is not supported under NOMMU conditions | 1736 | * MREMAP_FIXED is not supported under NOMMU conditions |
1690 | */ | 1737 | */ |
1691 | unsigned long do_mremap(unsigned long addr, | 1738 | unsigned long do_mremap(unsigned long addr, |
1692 | unsigned long old_len, unsigned long new_len, | 1739 | unsigned long old_len, unsigned long new_len, |
1693 | unsigned long flags, unsigned long new_addr) | 1740 | unsigned long flags, unsigned long new_addr) |
1694 | { | 1741 | { |
1695 | struct vm_area_struct *vma; | 1742 | struct vm_area_struct *vma; |
1696 | 1743 | ||
1697 | /* insanity checks first */ | 1744 | /* insanity checks first */ |
1698 | if (old_len == 0 || new_len == 0) | 1745 | if (old_len == 0 || new_len == 0) |
1699 | return (unsigned long) -EINVAL; | 1746 | return (unsigned long) -EINVAL; |
1700 | 1747 | ||
1701 | if (addr & ~PAGE_MASK) | 1748 | if (addr & ~PAGE_MASK) |
1702 | return -EINVAL; | 1749 | return -EINVAL; |
1703 | 1750 | ||
1704 | if (flags & MREMAP_FIXED && new_addr != addr) | 1751 | if (flags & MREMAP_FIXED && new_addr != addr) |
1705 | return (unsigned long) -EINVAL; | 1752 | return (unsigned long) -EINVAL; |
1706 | 1753 | ||
1707 | vma = find_vma_exact(current->mm, addr, old_len); | 1754 | vma = find_vma_exact(current->mm, addr, old_len); |
1708 | if (!vma) | 1755 | if (!vma) |
1709 | return (unsigned long) -EINVAL; | 1756 | return (unsigned long) -EINVAL; |
1710 | 1757 | ||
1711 | if (vma->vm_end != vma->vm_start + old_len) | 1758 | if (vma->vm_end != vma->vm_start + old_len) |
1712 | return (unsigned long) -EFAULT; | 1759 | return (unsigned long) -EFAULT; |
1713 | 1760 | ||
1714 | if (vma->vm_flags & VM_MAYSHARE) | 1761 | if (vma->vm_flags & VM_MAYSHARE) |
1715 | return (unsigned long) -EPERM; | 1762 | return (unsigned long) -EPERM; |
1716 | 1763 | ||
1717 | if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) | 1764 | if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) |
1718 | return (unsigned long) -ENOMEM; | 1765 | return (unsigned long) -ENOMEM; |
1719 | 1766 | ||
1720 | /* all checks complete - do it */ | 1767 | /* all checks complete - do it */ |
1721 | vma->vm_end = vma->vm_start + new_len; | 1768 | vma->vm_end = vma->vm_start + new_len; |
1722 | return vma->vm_start; | 1769 | return vma->vm_start; |
1723 | } | 1770 | } |
1724 | EXPORT_SYMBOL(do_mremap); | 1771 | EXPORT_SYMBOL(do_mremap); |
1725 | 1772 | ||
1726 | SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, | 1773 | SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, |
1727 | unsigned long, new_len, unsigned long, flags, | 1774 | unsigned long, new_len, unsigned long, flags, |
1728 | unsigned long, new_addr) | 1775 | unsigned long, new_addr) |
1729 | { | 1776 | { |
1730 | unsigned long ret; | 1777 | unsigned long ret; |
1731 | 1778 | ||
1732 | down_write(¤t->mm->mmap_sem); | 1779 | down_write(¤t->mm->mmap_sem); |
1733 | ret = do_mremap(addr, old_len, new_len, flags, new_addr); | 1780 | ret = do_mremap(addr, old_len, new_len, flags, new_addr); |
1734 | up_write(¤t->mm->mmap_sem); | 1781 | up_write(¤t->mm->mmap_sem); |
1735 | return ret; | 1782 | return ret; |
1736 | } | 1783 | } |
1737 | 1784 | ||
1738 | struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | 1785 | struct page *follow_page(struct vm_area_struct *vma, unsigned long address, |
1739 | unsigned int foll_flags) | 1786 | unsigned int foll_flags) |
1740 | { | 1787 | { |
1741 | return NULL; | 1788 | return NULL; |
1742 | } | 1789 | } |
1743 | 1790 | ||
1744 | int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, | 1791 | int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, |
1745 | unsigned long to, unsigned long size, pgprot_t prot) | 1792 | unsigned long to, unsigned long size, pgprot_t prot) |
1746 | { | 1793 | { |
1747 | vma->vm_start = vma->vm_pgoff << PAGE_SHIFT; | 1794 | vma->vm_start = vma->vm_pgoff << PAGE_SHIFT; |
1748 | return 0; | 1795 | return 0; |
1749 | } | 1796 | } |
1750 | EXPORT_SYMBOL(remap_pfn_range); | 1797 | EXPORT_SYMBOL(remap_pfn_range); |
1751 | 1798 | ||
1752 | int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, | 1799 | int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, |
1753 | unsigned long pgoff) | 1800 | unsigned long pgoff) |
1754 | { | 1801 | { |
1755 | unsigned int size = vma->vm_end - vma->vm_start; | 1802 | unsigned int size = vma->vm_end - vma->vm_start; |
1756 | 1803 | ||
1757 | if (!(vma->vm_flags & VM_USERMAP)) | 1804 | if (!(vma->vm_flags & VM_USERMAP)) |
1758 | return -EINVAL; | 1805 | return -EINVAL; |
1759 | 1806 | ||
1760 | vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); | 1807 | vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); |
1761 | vma->vm_end = vma->vm_start + size; | 1808 | vma->vm_end = vma->vm_start + size; |
1762 | 1809 | ||
1763 | return 0; | 1810 | return 0; |
1764 | } | 1811 | } |
1765 | EXPORT_SYMBOL(remap_vmalloc_range); | 1812 | EXPORT_SYMBOL(remap_vmalloc_range); |
1766 | 1813 | ||
1767 | void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | 1814 | void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) |
1768 | { | 1815 | { |
1769 | } | 1816 | } |
1770 | 1817 | ||
1771 | unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, | 1818 | unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, |
1772 | unsigned long len, unsigned long pgoff, unsigned long flags) | 1819 | unsigned long len, unsigned long pgoff, unsigned long flags) |
1773 | { | 1820 | { |
1774 | return -ENOMEM; | 1821 | return -ENOMEM; |
1775 | } | 1822 | } |
1776 | 1823 | ||
1777 | void arch_unmap_area(struct mm_struct *mm, unsigned long addr) | 1824 | void arch_unmap_area(struct mm_struct *mm, unsigned long addr) |
1778 | { | 1825 | { |
1779 | } | 1826 | } |
1780 | 1827 | ||
1781 | void unmap_mapping_range(struct address_space *mapping, | 1828 | void unmap_mapping_range(struct address_space *mapping, |
1782 | loff_t const holebegin, loff_t const holelen, | 1829 | loff_t const holebegin, loff_t const holelen, |
1783 | int even_cows) | 1830 | int even_cows) |
1784 | { | 1831 | { |
1785 | } | 1832 | } |
1786 | EXPORT_SYMBOL(unmap_mapping_range); | 1833 | EXPORT_SYMBOL(unmap_mapping_range); |
1787 | 1834 | ||
1788 | /* | 1835 | /* |
1789 | * Check that a process has enough memory to allocate a new virtual | 1836 | * Check that a process has enough memory to allocate a new virtual |
1790 | * mapping. 0 means there is enough memory for the allocation to | 1837 | * mapping. 0 means there is enough memory for the allocation to |
1791 | * succeed and -ENOMEM implies there is not. | 1838 | * succeed and -ENOMEM implies there is not. |
1792 | * | 1839 | * |
1793 | * We currently support three overcommit policies, which are set via the | 1840 | * We currently support three overcommit policies, which are set via the |
1794 | * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting | 1841 | * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting |
1795 | * | 1842 | * |
1796 | * Strict overcommit modes added 2002 Feb 26 by Alan Cox. | 1843 | * Strict overcommit modes added 2002 Feb 26 by Alan Cox. |
1797 | * Additional code 2002 Jul 20 by Robert Love. | 1844 | * Additional code 2002 Jul 20 by Robert Love. |
1798 | * | 1845 | * |
1799 | * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. | 1846 | * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. |
1800 | * | 1847 | * |
1801 | * Note this is a helper function intended to be used by LSMs which | 1848 | * Note this is a helper function intended to be used by LSMs which |
1802 | * wish to use this logic. | 1849 | * wish to use this logic. |
1803 | */ | 1850 | */ |
1804 | int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | 1851 | int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) |
1805 | { | 1852 | { |
1806 | unsigned long free, allowed; | 1853 | unsigned long free, allowed; |
1807 | 1854 | ||
1808 | vm_acct_memory(pages); | 1855 | vm_acct_memory(pages); |
1809 | 1856 | ||
1810 | /* | 1857 | /* |
1811 | * Sometimes we want to use more memory than we have | 1858 | * Sometimes we want to use more memory than we have |
1812 | */ | 1859 | */ |
1813 | if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) | 1860 | if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) |
1814 | return 0; | 1861 | return 0; |
1815 | 1862 | ||
1816 | if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { | 1863 | if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { |
1817 | unsigned long n; | 1864 | unsigned long n; |
1818 | 1865 | ||
1819 | free = global_page_state(NR_FILE_PAGES); | 1866 | free = global_page_state(NR_FILE_PAGES); |
1820 | free += nr_swap_pages; | 1867 | free += nr_swap_pages; |
1821 | 1868 | ||
1822 | /* | 1869 | /* |
1823 | * Any slabs which are created with the | 1870 | * Any slabs which are created with the |
1824 | * SLAB_RECLAIM_ACCOUNT flag claim to have contents | 1871 | * SLAB_RECLAIM_ACCOUNT flag claim to have contents |
1825 | * which are reclaimable, under pressure. The dentry | 1872 | * which are reclaimable, under pressure. The dentry |
1826 | * cache and most inode caches should fall into this | 1873 | * cache and most inode caches should fall into this |
1827 | */ | 1874 | */ |
1828 | free += global_page_state(NR_SLAB_RECLAIMABLE); | 1875 | free += global_page_state(NR_SLAB_RECLAIMABLE); |
1829 | 1876 | ||
1830 | /* | 1877 | /* |
1831 | * Leave the last 3% for root | 1878 | * Leave the last 3% for root |
1832 | */ | 1879 | */ |
1833 | if (!cap_sys_admin) | 1880 | if (!cap_sys_admin) |
1834 | free -= free / 32; | 1881 | free -= free / 32; |
1835 | 1882 | ||
1836 | if (free > pages) | 1883 | if (free > pages) |
1837 | return 0; | 1884 | return 0; |
1838 | 1885 | ||
1839 | /* | 1886 | /* |
1840 | * nr_free_pages() is very expensive on large systems, | 1887 | * nr_free_pages() is very expensive on large systems, |
1841 | * only call if we're about to fail. | 1888 | * only call if we're about to fail. |
1842 | */ | 1889 | */ |
1843 | n = nr_free_pages(); | 1890 | n = nr_free_pages(); |
1844 | 1891 | ||
1845 | /* | 1892 | /* |
1846 | * Leave reserved pages. The pages are not for anonymous pages. | 1893 | * Leave reserved pages. The pages are not for anonymous pages. |
1847 | */ | 1894 | */ |
1848 | if (n <= totalreserve_pages) | 1895 | if (n <= totalreserve_pages) |
1849 | goto error; | 1896 | goto error; |
1850 | else | 1897 | else |
1851 | n -= totalreserve_pages; | 1898 | n -= totalreserve_pages; |
1852 | 1899 | ||
1853 | /* | 1900 | /* |
1854 | * Leave the last 3% for root | 1901 | * Leave the last 3% for root |
1855 | */ | 1902 | */ |
1856 | if (!cap_sys_admin) | 1903 | if (!cap_sys_admin) |
1857 | n -= n / 32; | 1904 | n -= n / 32; |
1858 | free += n; | 1905 | free += n; |
1859 | 1906 | ||
1860 | if (free > pages) | 1907 | if (free > pages) |
1861 | return 0; | 1908 | return 0; |
1862 | 1909 | ||
1863 | goto error; | 1910 | goto error; |
1864 | } | 1911 | } |
1865 | 1912 | ||
1866 | allowed = totalram_pages * sysctl_overcommit_ratio / 100; | 1913 | allowed = totalram_pages * sysctl_overcommit_ratio / 100; |
1867 | /* | 1914 | /* |
1868 | * Leave the last 3% for root | 1915 | * Leave the last 3% for root |
1869 | */ | 1916 | */ |
1870 | if (!cap_sys_admin) | 1917 | if (!cap_sys_admin) |
1871 | allowed -= allowed / 32; | 1918 | allowed -= allowed / 32; |
1872 | allowed += total_swap_pages; | 1919 | allowed += total_swap_pages; |
1873 | 1920 | ||
1874 | /* Don't let a single process grow too big: | 1921 | /* Don't let a single process grow too big: |
1875 | leave 3% of the size of this process for other processes */ | 1922 | leave 3% of the size of this process for other processes */ |
1876 | if (mm) | 1923 | if (mm) |
1877 | allowed -= mm->total_vm / 32; | 1924 | allowed -= mm->total_vm / 32; |
1878 | 1925 | ||
1879 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) | 1926 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
1880 | return 0; | 1927 | return 0; |
1881 | 1928 | ||
1882 | error: | 1929 | error: |
1883 | vm_unacct_memory(pages); | 1930 | vm_unacct_memory(pages); |
1884 | 1931 | ||
1885 | return -ENOMEM; | 1932 | return -ENOMEM; |
1886 | } | 1933 | } |
1887 | 1934 | ||
1888 | int in_gate_area_no_task(unsigned long addr) | 1935 | int in_gate_area_no_task(unsigned long addr) |
1889 | { | 1936 | { |
1890 | return 0; | 1937 | return 0; |
1891 | } | 1938 | } |
1892 | 1939 | ||
1893 | int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1940 | int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1894 | { | 1941 | { |
1895 | BUG(); | 1942 | BUG(); |
1896 | return 0; | 1943 | return 0; |
1897 | } | 1944 | } |
1898 | EXPORT_SYMBOL(filemap_fault); | 1945 | EXPORT_SYMBOL(filemap_fault); |
1899 | 1946 | ||
1900 | /* | 1947 | /* |
1901 | * Access another process' address space. | 1948 | * Access another process' address space. |
1902 | * - source/target buffer must be kernel space | 1949 | * - source/target buffer must be kernel space |
1903 | */ | 1950 | */ |
1904 | int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) | 1951 | int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) |
1905 | { | 1952 | { |
1906 | struct vm_area_struct *vma; | 1953 | struct vm_area_struct *vma; |
1907 | struct mm_struct *mm; | 1954 | struct mm_struct *mm; |
1908 | 1955 | ||
1909 | if (addr + len < addr) | 1956 | if (addr + len < addr) |
1910 | return 0; | 1957 | return 0; |
1911 | 1958 | ||
1912 | mm = get_task_mm(tsk); | 1959 | mm = get_task_mm(tsk); |
1913 | if (!mm) | 1960 | if (!mm) |
1914 | return 0; | 1961 | return 0; |
1915 | 1962 | ||
1916 | down_read(&mm->mmap_sem); | 1963 | down_read(&mm->mmap_sem); |
1917 | 1964 | ||
1918 | /* the access must start within one of the target process's mappings */ | 1965 | /* the access must start within one of the target process's mappings */ |
1919 | vma = find_vma(mm, addr); | 1966 | vma = find_vma(mm, addr); |
1920 | if (vma) { | 1967 | if (vma) { |
1921 | /* don't overrun this mapping */ | 1968 | /* don't overrun this mapping */ |
1922 | if (addr + len >= vma->vm_end) | 1969 | if (addr + len >= vma->vm_end) |
1923 | len = vma->vm_end - addr; | 1970 | len = vma->vm_end - addr; |
1924 | 1971 | ||
1925 | /* only read or write mappings where it is permitted */ | 1972 | /* only read or write mappings where it is permitted */ |
1926 | if (write && vma->vm_flags & VM_MAYWRITE) | 1973 | if (write && vma->vm_flags & VM_MAYWRITE) |
1927 | copy_to_user_page(vma, NULL, addr, | 1974 | copy_to_user_page(vma, NULL, addr, |
1928 | (void *) addr, buf, len); | 1975 | (void *) addr, buf, len); |
1929 | else if (!write && vma->vm_flags & VM_MAYREAD) | 1976 | else if (!write && vma->vm_flags & VM_MAYREAD) |
1930 | copy_from_user_page(vma, NULL, addr, | 1977 | copy_from_user_page(vma, NULL, addr, |
1931 | buf, (void *) addr, len); | 1978 | buf, (void *) addr, len); |
1932 | else | 1979 | else |
1933 | len = 0; | 1980 | len = 0; |
1934 | } else { | 1981 | } else { |
1935 | len = 0; | 1982 | len = 0; |
1936 | } | 1983 | } |
1937 | 1984 | ||
1938 | up_read(&mm->mmap_sem); | 1985 | up_read(&mm->mmap_sem); |
1939 | mmput(mm); | 1986 | mmput(mm); |
1940 | return len; | 1987 | return len; |
1941 | } | 1988 | } |
1942 | 1989 | ||
1943 | /** | 1990 | /** |
1944 | * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode | 1991 | * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode |
1945 | * @inode: The inode to check | 1992 | * @inode: The inode to check |
1946 | * @size: The current filesize of the inode | 1993 | * @size: The current filesize of the inode |
1947 | * @newsize: The proposed filesize of the inode | 1994 | * @newsize: The proposed filesize of the inode |
1948 | * | 1995 | * |
1949 | * Check the shared mappings on an inode on behalf of a shrinking truncate to | 1996 | * Check the shared mappings on an inode on behalf of a shrinking truncate to |
1950 | * make sure that that any outstanding VMAs aren't broken and then shrink the | 1997 | * make sure that that any outstanding VMAs aren't broken and then shrink the |
1951 | * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't | 1998 | * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't |
1952 | * automatically grant mappings that are too large. | 1999 | * automatically grant mappings that are too large. |
1953 | */ | 2000 | */ |
1954 | int nommu_shrink_inode_mappings(struct inode *inode, size_t size, | 2001 | int nommu_shrink_inode_mappings(struct inode *inode, size_t size, |
1955 | size_t newsize) | 2002 | size_t newsize) |
1956 | { | 2003 | { |
1957 | struct vm_area_struct *vma; | 2004 | struct vm_area_struct *vma; |
1958 | struct prio_tree_iter iter; | 2005 | struct prio_tree_iter iter; |
1959 | struct vm_region *region; | 2006 | struct vm_region *region; |
1960 | pgoff_t low, high; | 2007 | pgoff_t low, high; |
1961 | size_t r_size, r_top; | 2008 | size_t r_size, r_top; |
1962 | 2009 | ||
1963 | low = newsize >> PAGE_SHIFT; | 2010 | low = newsize >> PAGE_SHIFT; |
1964 | high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | 2011 | high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1965 | 2012 | ||
1966 | down_write(&nommu_region_sem); | 2013 | down_write(&nommu_region_sem); |
1967 | 2014 | ||
1968 | /* search for VMAs that fall within the dead zone */ | 2015 | /* search for VMAs that fall within the dead zone */ |
1969 | vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, | 2016 | vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, |
1970 | low, high) { | 2017 | low, high) { |
1971 | /* found one - only interested if it's shared out of the page | 2018 | /* found one - only interested if it's shared out of the page |
1972 | * cache */ | 2019 | * cache */ |
1973 | if (vma->vm_flags & VM_SHARED) { | 2020 | if (vma->vm_flags & VM_SHARED) { |
1974 | up_write(&nommu_region_sem); | 2021 | up_write(&nommu_region_sem); |
1975 | return -ETXTBSY; /* not quite true, but near enough */ | 2022 | return -ETXTBSY; /* not quite true, but near enough */ |
1976 | } | 2023 | } |
1977 | } | 2024 | } |
1978 | 2025 | ||
1979 | /* reduce any regions that overlap the dead zone - if in existence, | 2026 | /* reduce any regions that overlap the dead zone - if in existence, |
1980 | * these will be pointed to by VMAs that don't overlap the dead zone | 2027 | * these will be pointed to by VMAs that don't overlap the dead zone |
1981 | * | 2028 | * |
1982 | * we don't check for any regions that start beyond the EOF as there | 2029 | * we don't check for any regions that start beyond the EOF as there |
1983 | * shouldn't be any | 2030 | * shouldn't be any |
1984 | */ | 2031 | */ |
1985 | vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, | 2032 | vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, |
1986 | 0, ULONG_MAX) { | 2033 | 0, ULONG_MAX) { |
1987 | if (!(vma->vm_flags & VM_SHARED)) | 2034 | if (!(vma->vm_flags & VM_SHARED)) |
1988 | continue; | 2035 | continue; |
1989 | 2036 | ||
1990 | region = vma->vm_region; | 2037 | region = vma->vm_region; |
1991 | r_size = region->vm_top - region->vm_start; | 2038 | r_size = region->vm_top - region->vm_start; |
1992 | r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; | 2039 | r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; |
1993 | 2040 | ||
1994 | if (r_top > newsize) { | 2041 | if (r_top > newsize) { |
1995 | region->vm_top -= r_top - newsize; | 2042 | region->vm_top -= r_top - newsize; |
1996 | if (region->vm_end > region->vm_top) | 2043 | if (region->vm_end > region->vm_top) |
1997 | region->vm_end = region->vm_top; | 2044 | region->vm_end = region->vm_top; |
1998 | } | 2045 | } |
1999 | } | 2046 | } |
2000 | 2047 | ||
2001 | up_write(&nommu_region_sem); | 2048 | up_write(&nommu_region_sem); |
2002 | return 0; | 2049 | return 0; |
2003 | } | 2050 | } |
2004 | 2051 |
mm/vmalloc.c
1 | /* | 1 | /* |
2 | * linux/mm/vmalloc.c | 2 | * linux/mm/vmalloc.c |
3 | * | 3 | * |
4 | * Copyright (C) 1993 Linus Torvalds | 4 | * Copyright (C) 1993 Linus Torvalds |
5 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 | 5 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 |
6 | * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000 | 6 | * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000 |
7 | * Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002 | 7 | * Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002 |
8 | * Numa awareness, Christoph Lameter, SGI, June 2005 | 8 | * Numa awareness, Christoph Lameter, SGI, June 2005 |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/highmem.h> | 14 | #include <linux/highmem.h> |
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
19 | #include <linux/proc_fs.h> | 19 | #include <linux/proc_fs.h> |
20 | #include <linux/seq_file.h> | 20 | #include <linux/seq_file.h> |
21 | #include <linux/debugobjects.h> | 21 | #include <linux/debugobjects.h> |
22 | #include <linux/kallsyms.h> | 22 | #include <linux/kallsyms.h> |
23 | #include <linux/list.h> | 23 | #include <linux/list.h> |
24 | #include <linux/rbtree.h> | 24 | #include <linux/rbtree.h> |
25 | #include <linux/radix-tree.h> | 25 | #include <linux/radix-tree.h> |
26 | #include <linux/rcupdate.h> | 26 | #include <linux/rcupdate.h> |
27 | #include <linux/pfn.h> | 27 | #include <linux/pfn.h> |
28 | #include <linux/kmemleak.h> | 28 | #include <linux/kmemleak.h> |
29 | #include <asm/atomic.h> | 29 | #include <asm/atomic.h> |
30 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
32 | #include <asm/shmparam.h> | 32 | #include <asm/shmparam.h> |
33 | 33 | ||
34 | bool vmap_lazy_unmap __read_mostly = true; | 34 | bool vmap_lazy_unmap __read_mostly = true; |
35 | 35 | ||
36 | /*** Page table manipulation functions ***/ | 36 | /*** Page table manipulation functions ***/ |
37 | 37 | ||
38 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) | 38 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) |
39 | { | 39 | { |
40 | pte_t *pte; | 40 | pte_t *pte; |
41 | 41 | ||
42 | pte = pte_offset_kernel(pmd, addr); | 42 | pte = pte_offset_kernel(pmd, addr); |
43 | do { | 43 | do { |
44 | pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); | 44 | pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); |
45 | WARN_ON(!pte_none(ptent) && !pte_present(ptent)); | 45 | WARN_ON(!pte_none(ptent) && !pte_present(ptent)); |
46 | } while (pte++, addr += PAGE_SIZE, addr != end); | 46 | } while (pte++, addr += PAGE_SIZE, addr != end); |
47 | } | 47 | } |
48 | 48 | ||
49 | static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) | 49 | static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) |
50 | { | 50 | { |
51 | pmd_t *pmd; | 51 | pmd_t *pmd; |
52 | unsigned long next; | 52 | unsigned long next; |
53 | 53 | ||
54 | pmd = pmd_offset(pud, addr); | 54 | pmd = pmd_offset(pud, addr); |
55 | do { | 55 | do { |
56 | next = pmd_addr_end(addr, end); | 56 | next = pmd_addr_end(addr, end); |
57 | if (pmd_none_or_clear_bad(pmd)) | 57 | if (pmd_none_or_clear_bad(pmd)) |
58 | continue; | 58 | continue; |
59 | vunmap_pte_range(pmd, addr, next); | 59 | vunmap_pte_range(pmd, addr, next); |
60 | } while (pmd++, addr = next, addr != end); | 60 | } while (pmd++, addr = next, addr != end); |
61 | } | 61 | } |
62 | 62 | ||
63 | static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) | 63 | static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) |
64 | { | 64 | { |
65 | pud_t *pud; | 65 | pud_t *pud; |
66 | unsigned long next; | 66 | unsigned long next; |
67 | 67 | ||
68 | pud = pud_offset(pgd, addr); | 68 | pud = pud_offset(pgd, addr); |
69 | do { | 69 | do { |
70 | next = pud_addr_end(addr, end); | 70 | next = pud_addr_end(addr, end); |
71 | if (pud_none_or_clear_bad(pud)) | 71 | if (pud_none_or_clear_bad(pud)) |
72 | continue; | 72 | continue; |
73 | vunmap_pmd_range(pud, addr, next); | 73 | vunmap_pmd_range(pud, addr, next); |
74 | } while (pud++, addr = next, addr != end); | 74 | } while (pud++, addr = next, addr != end); |
75 | } | 75 | } |
76 | 76 | ||
77 | static void vunmap_page_range(unsigned long addr, unsigned long end) | 77 | static void vunmap_page_range(unsigned long addr, unsigned long end) |
78 | { | 78 | { |
79 | pgd_t *pgd; | 79 | pgd_t *pgd; |
80 | unsigned long next; | 80 | unsigned long next; |
81 | 81 | ||
82 | BUG_ON(addr >= end); | 82 | BUG_ON(addr >= end); |
83 | pgd = pgd_offset_k(addr); | 83 | pgd = pgd_offset_k(addr); |
84 | do { | 84 | do { |
85 | next = pgd_addr_end(addr, end); | 85 | next = pgd_addr_end(addr, end); |
86 | if (pgd_none_or_clear_bad(pgd)) | 86 | if (pgd_none_or_clear_bad(pgd)) |
87 | continue; | 87 | continue; |
88 | vunmap_pud_range(pgd, addr, next); | 88 | vunmap_pud_range(pgd, addr, next); |
89 | } while (pgd++, addr = next, addr != end); | 89 | } while (pgd++, addr = next, addr != end); |
90 | } | 90 | } |
91 | 91 | ||
92 | static int vmap_pte_range(pmd_t *pmd, unsigned long addr, | 92 | static int vmap_pte_range(pmd_t *pmd, unsigned long addr, |
93 | unsigned long end, pgprot_t prot, struct page **pages, int *nr) | 93 | unsigned long end, pgprot_t prot, struct page **pages, int *nr) |
94 | { | 94 | { |
95 | pte_t *pte; | 95 | pte_t *pte; |
96 | 96 | ||
97 | /* | 97 | /* |
98 | * nr is a running index into the array which helps higher level | 98 | * nr is a running index into the array which helps higher level |
99 | * callers keep track of where we're up to. | 99 | * callers keep track of where we're up to. |
100 | */ | 100 | */ |
101 | 101 | ||
102 | pte = pte_alloc_kernel(pmd, addr); | 102 | pte = pte_alloc_kernel(pmd, addr); |
103 | if (!pte) | 103 | if (!pte) |
104 | return -ENOMEM; | 104 | return -ENOMEM; |
105 | do { | 105 | do { |
106 | struct page *page = pages[*nr]; | 106 | struct page *page = pages[*nr]; |
107 | 107 | ||
108 | if (WARN_ON(!pte_none(*pte))) | 108 | if (WARN_ON(!pte_none(*pte))) |
109 | return -EBUSY; | 109 | return -EBUSY; |
110 | if (WARN_ON(!page)) | 110 | if (WARN_ON(!page)) |
111 | return -ENOMEM; | 111 | return -ENOMEM; |
112 | set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); | 112 | set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); |
113 | (*nr)++; | 113 | (*nr)++; |
114 | } while (pte++, addr += PAGE_SIZE, addr != end); | 114 | } while (pte++, addr += PAGE_SIZE, addr != end); |
115 | return 0; | 115 | return 0; |
116 | } | 116 | } |
117 | 117 | ||
118 | static int vmap_pmd_range(pud_t *pud, unsigned long addr, | 118 | static int vmap_pmd_range(pud_t *pud, unsigned long addr, |
119 | unsigned long end, pgprot_t prot, struct page **pages, int *nr) | 119 | unsigned long end, pgprot_t prot, struct page **pages, int *nr) |
120 | { | 120 | { |
121 | pmd_t *pmd; | 121 | pmd_t *pmd; |
122 | unsigned long next; | 122 | unsigned long next; |
123 | 123 | ||
124 | pmd = pmd_alloc(&init_mm, pud, addr); | 124 | pmd = pmd_alloc(&init_mm, pud, addr); |
125 | if (!pmd) | 125 | if (!pmd) |
126 | return -ENOMEM; | 126 | return -ENOMEM; |
127 | do { | 127 | do { |
128 | next = pmd_addr_end(addr, end); | 128 | next = pmd_addr_end(addr, end); |
129 | if (vmap_pte_range(pmd, addr, next, prot, pages, nr)) | 129 | if (vmap_pte_range(pmd, addr, next, prot, pages, nr)) |
130 | return -ENOMEM; | 130 | return -ENOMEM; |
131 | } while (pmd++, addr = next, addr != end); | 131 | } while (pmd++, addr = next, addr != end); |
132 | return 0; | 132 | return 0; |
133 | } | 133 | } |
134 | 134 | ||
135 | static int vmap_pud_range(pgd_t *pgd, unsigned long addr, | 135 | static int vmap_pud_range(pgd_t *pgd, unsigned long addr, |
136 | unsigned long end, pgprot_t prot, struct page **pages, int *nr) | 136 | unsigned long end, pgprot_t prot, struct page **pages, int *nr) |
137 | { | 137 | { |
138 | pud_t *pud; | 138 | pud_t *pud; |
139 | unsigned long next; | 139 | unsigned long next; |
140 | 140 | ||
141 | pud = pud_alloc(&init_mm, pgd, addr); | 141 | pud = pud_alloc(&init_mm, pgd, addr); |
142 | if (!pud) | 142 | if (!pud) |
143 | return -ENOMEM; | 143 | return -ENOMEM; |
144 | do { | 144 | do { |
145 | next = pud_addr_end(addr, end); | 145 | next = pud_addr_end(addr, end); |
146 | if (vmap_pmd_range(pud, addr, next, prot, pages, nr)) | 146 | if (vmap_pmd_range(pud, addr, next, prot, pages, nr)) |
147 | return -ENOMEM; | 147 | return -ENOMEM; |
148 | } while (pud++, addr = next, addr != end); | 148 | } while (pud++, addr = next, addr != end); |
149 | return 0; | 149 | return 0; |
150 | } | 150 | } |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and | 153 | * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and |
154 | * will have pfns corresponding to the "pages" array. | 154 | * will have pfns corresponding to the "pages" array. |
155 | * | 155 | * |
156 | * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] | 156 | * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] |
157 | */ | 157 | */ |
158 | static int vmap_page_range_noflush(unsigned long start, unsigned long end, | 158 | static int vmap_page_range_noflush(unsigned long start, unsigned long end, |
159 | pgprot_t prot, struct page **pages) | 159 | pgprot_t prot, struct page **pages) |
160 | { | 160 | { |
161 | pgd_t *pgd; | 161 | pgd_t *pgd; |
162 | unsigned long next; | 162 | unsigned long next; |
163 | unsigned long addr = start; | 163 | unsigned long addr = start; |
164 | int err = 0; | 164 | int err = 0; |
165 | int nr = 0; | 165 | int nr = 0; |
166 | 166 | ||
167 | BUG_ON(addr >= end); | 167 | BUG_ON(addr >= end); |
168 | pgd = pgd_offset_k(addr); | 168 | pgd = pgd_offset_k(addr); |
169 | do { | 169 | do { |
170 | next = pgd_addr_end(addr, end); | 170 | next = pgd_addr_end(addr, end); |
171 | err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); | 171 | err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); |
172 | if (err) | 172 | if (err) |
173 | return err; | 173 | return err; |
174 | } while (pgd++, addr = next, addr != end); | 174 | } while (pgd++, addr = next, addr != end); |
175 | 175 | ||
176 | return nr; | 176 | return nr; |
177 | } | 177 | } |
178 | 178 | ||
179 | static int vmap_page_range(unsigned long start, unsigned long end, | 179 | static int vmap_page_range(unsigned long start, unsigned long end, |
180 | pgprot_t prot, struct page **pages) | 180 | pgprot_t prot, struct page **pages) |
181 | { | 181 | { |
182 | int ret; | 182 | int ret; |
183 | 183 | ||
184 | ret = vmap_page_range_noflush(start, end, prot, pages); | 184 | ret = vmap_page_range_noflush(start, end, prot, pages); |
185 | flush_cache_vmap(start, end); | 185 | flush_cache_vmap(start, end); |
186 | return ret; | 186 | return ret; |
187 | } | 187 | } |
188 | 188 | ||
189 | int is_vmalloc_or_module_addr(const void *x) | 189 | int is_vmalloc_or_module_addr(const void *x) |
190 | { | 190 | { |
191 | /* | 191 | /* |
192 | * ARM, x86-64 and sparc64 put modules in a special place, | 192 | * ARM, x86-64 and sparc64 put modules in a special place, |
193 | * and fall back on vmalloc() if that fails. Others | 193 | * and fall back on vmalloc() if that fails. Others |
194 | * just put it in the vmalloc space. | 194 | * just put it in the vmalloc space. |
195 | */ | 195 | */ |
196 | #if defined(CONFIG_MODULES) && defined(MODULES_VADDR) | 196 | #if defined(CONFIG_MODULES) && defined(MODULES_VADDR) |
197 | unsigned long addr = (unsigned long)x; | 197 | unsigned long addr = (unsigned long)x; |
198 | if (addr >= MODULES_VADDR && addr < MODULES_END) | 198 | if (addr >= MODULES_VADDR && addr < MODULES_END) |
199 | return 1; | 199 | return 1; |
200 | #endif | 200 | #endif |
201 | return is_vmalloc_addr(x); | 201 | return is_vmalloc_addr(x); |
202 | } | 202 | } |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * Walk a vmap address to the struct page it maps. | 205 | * Walk a vmap address to the struct page it maps. |
206 | */ | 206 | */ |
207 | struct page *vmalloc_to_page(const void *vmalloc_addr) | 207 | struct page *vmalloc_to_page(const void *vmalloc_addr) |
208 | { | 208 | { |
209 | unsigned long addr = (unsigned long) vmalloc_addr; | 209 | unsigned long addr = (unsigned long) vmalloc_addr; |
210 | struct page *page = NULL; | 210 | struct page *page = NULL; |
211 | pgd_t *pgd = pgd_offset_k(addr); | 211 | pgd_t *pgd = pgd_offset_k(addr); |
212 | 212 | ||
213 | /* | 213 | /* |
214 | * XXX we might need to change this if we add VIRTUAL_BUG_ON for | 214 | * XXX we might need to change this if we add VIRTUAL_BUG_ON for |
215 | * architectures that do not vmalloc module space | 215 | * architectures that do not vmalloc module space |
216 | */ | 216 | */ |
217 | VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); | 217 | VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); |
218 | 218 | ||
219 | if (!pgd_none(*pgd)) { | 219 | if (!pgd_none(*pgd)) { |
220 | pud_t *pud = pud_offset(pgd, addr); | 220 | pud_t *pud = pud_offset(pgd, addr); |
221 | if (!pud_none(*pud)) { | 221 | if (!pud_none(*pud)) { |
222 | pmd_t *pmd = pmd_offset(pud, addr); | 222 | pmd_t *pmd = pmd_offset(pud, addr); |
223 | if (!pmd_none(*pmd)) { | 223 | if (!pmd_none(*pmd)) { |
224 | pte_t *ptep, pte; | 224 | pte_t *ptep, pte; |
225 | 225 | ||
226 | ptep = pte_offset_map(pmd, addr); | 226 | ptep = pte_offset_map(pmd, addr); |
227 | pte = *ptep; | 227 | pte = *ptep; |
228 | if (pte_present(pte)) | 228 | if (pte_present(pte)) |
229 | page = pte_page(pte); | 229 | page = pte_page(pte); |
230 | pte_unmap(ptep); | 230 | pte_unmap(ptep); |
231 | } | 231 | } |
232 | } | 232 | } |
233 | } | 233 | } |
234 | return page; | 234 | return page; |
235 | } | 235 | } |
236 | EXPORT_SYMBOL(vmalloc_to_page); | 236 | EXPORT_SYMBOL(vmalloc_to_page); |
237 | 237 | ||
238 | /* | 238 | /* |
239 | * Map a vmalloc()-space virtual address to the physical page frame number. | 239 | * Map a vmalloc()-space virtual address to the physical page frame number. |
240 | */ | 240 | */ |
241 | unsigned long vmalloc_to_pfn(const void *vmalloc_addr) | 241 | unsigned long vmalloc_to_pfn(const void *vmalloc_addr) |
242 | { | 242 | { |
243 | return page_to_pfn(vmalloc_to_page(vmalloc_addr)); | 243 | return page_to_pfn(vmalloc_to_page(vmalloc_addr)); |
244 | } | 244 | } |
245 | EXPORT_SYMBOL(vmalloc_to_pfn); | 245 | EXPORT_SYMBOL(vmalloc_to_pfn); |
246 | 246 | ||
247 | 247 | ||
248 | /*** Global kva allocator ***/ | 248 | /*** Global kva allocator ***/ |
249 | 249 | ||
250 | #define VM_LAZY_FREE 0x01 | 250 | #define VM_LAZY_FREE 0x01 |
251 | #define VM_LAZY_FREEING 0x02 | 251 | #define VM_LAZY_FREEING 0x02 |
252 | #define VM_VM_AREA 0x04 | 252 | #define VM_VM_AREA 0x04 |
253 | 253 | ||
254 | struct vmap_area { | 254 | struct vmap_area { |
255 | unsigned long va_start; | 255 | unsigned long va_start; |
256 | unsigned long va_end; | 256 | unsigned long va_end; |
257 | unsigned long flags; | 257 | unsigned long flags; |
258 | struct rb_node rb_node; /* address sorted rbtree */ | 258 | struct rb_node rb_node; /* address sorted rbtree */ |
259 | struct list_head list; /* address sorted list */ | 259 | struct list_head list; /* address sorted list */ |
260 | struct list_head purge_list; /* "lazy purge" list */ | 260 | struct list_head purge_list; /* "lazy purge" list */ |
261 | void *private; | 261 | void *private; |
262 | struct rcu_head rcu_head; | 262 | struct rcu_head rcu_head; |
263 | }; | 263 | }; |
264 | 264 | ||
265 | static DEFINE_SPINLOCK(vmap_area_lock); | 265 | static DEFINE_SPINLOCK(vmap_area_lock); |
266 | static struct rb_root vmap_area_root = RB_ROOT; | 266 | static struct rb_root vmap_area_root = RB_ROOT; |
267 | static LIST_HEAD(vmap_area_list); | 267 | static LIST_HEAD(vmap_area_list); |
268 | static unsigned long vmap_area_pcpu_hole; | 268 | static unsigned long vmap_area_pcpu_hole; |
269 | 269 | ||
270 | static struct vmap_area *__find_vmap_area(unsigned long addr) | 270 | static struct vmap_area *__find_vmap_area(unsigned long addr) |
271 | { | 271 | { |
272 | struct rb_node *n = vmap_area_root.rb_node; | 272 | struct rb_node *n = vmap_area_root.rb_node; |
273 | 273 | ||
274 | while (n) { | 274 | while (n) { |
275 | struct vmap_area *va; | 275 | struct vmap_area *va; |
276 | 276 | ||
277 | va = rb_entry(n, struct vmap_area, rb_node); | 277 | va = rb_entry(n, struct vmap_area, rb_node); |
278 | if (addr < va->va_start) | 278 | if (addr < va->va_start) |
279 | n = n->rb_left; | 279 | n = n->rb_left; |
280 | else if (addr > va->va_start) | 280 | else if (addr > va->va_start) |
281 | n = n->rb_right; | 281 | n = n->rb_right; |
282 | else | 282 | else |
283 | return va; | 283 | return va; |
284 | } | 284 | } |
285 | 285 | ||
286 | return NULL; | 286 | return NULL; |
287 | } | 287 | } |
288 | 288 | ||
289 | static void __insert_vmap_area(struct vmap_area *va) | 289 | static void __insert_vmap_area(struct vmap_area *va) |
290 | { | 290 | { |
291 | struct rb_node **p = &vmap_area_root.rb_node; | 291 | struct rb_node **p = &vmap_area_root.rb_node; |
292 | struct rb_node *parent = NULL; | 292 | struct rb_node *parent = NULL; |
293 | struct rb_node *tmp; | 293 | struct rb_node *tmp; |
294 | 294 | ||
295 | while (*p) { | 295 | while (*p) { |
296 | struct vmap_area *tmp_va; | 296 | struct vmap_area *tmp_va; |
297 | 297 | ||
298 | parent = *p; | 298 | parent = *p; |
299 | tmp_va = rb_entry(parent, struct vmap_area, rb_node); | 299 | tmp_va = rb_entry(parent, struct vmap_area, rb_node); |
300 | if (va->va_start < tmp_va->va_end) | 300 | if (va->va_start < tmp_va->va_end) |
301 | p = &(*p)->rb_left; | 301 | p = &(*p)->rb_left; |
302 | else if (va->va_end > tmp_va->va_start) | 302 | else if (va->va_end > tmp_va->va_start) |
303 | p = &(*p)->rb_right; | 303 | p = &(*p)->rb_right; |
304 | else | 304 | else |
305 | BUG(); | 305 | BUG(); |
306 | } | 306 | } |
307 | 307 | ||
308 | rb_link_node(&va->rb_node, parent, p); | 308 | rb_link_node(&va->rb_node, parent, p); |
309 | rb_insert_color(&va->rb_node, &vmap_area_root); | 309 | rb_insert_color(&va->rb_node, &vmap_area_root); |
310 | 310 | ||
311 | /* address-sort this list so it is usable like the vmlist */ | 311 | /* address-sort this list so it is usable like the vmlist */ |
312 | tmp = rb_prev(&va->rb_node); | 312 | tmp = rb_prev(&va->rb_node); |
313 | if (tmp) { | 313 | if (tmp) { |
314 | struct vmap_area *prev; | 314 | struct vmap_area *prev; |
315 | prev = rb_entry(tmp, struct vmap_area, rb_node); | 315 | prev = rb_entry(tmp, struct vmap_area, rb_node); |
316 | list_add_rcu(&va->list, &prev->list); | 316 | list_add_rcu(&va->list, &prev->list); |
317 | } else | 317 | } else |
318 | list_add_rcu(&va->list, &vmap_area_list); | 318 | list_add_rcu(&va->list, &vmap_area_list); |
319 | } | 319 | } |
320 | 320 | ||
321 | static void purge_vmap_area_lazy(void); | 321 | static void purge_vmap_area_lazy(void); |
322 | 322 | ||
323 | /* | 323 | /* |
324 | * Allocate a region of KVA of the specified size and alignment, within the | 324 | * Allocate a region of KVA of the specified size and alignment, within the |
325 | * vstart and vend. | 325 | * vstart and vend. |
326 | */ | 326 | */ |
327 | static struct vmap_area *alloc_vmap_area(unsigned long size, | 327 | static struct vmap_area *alloc_vmap_area(unsigned long size, |
328 | unsigned long align, | 328 | unsigned long align, |
329 | unsigned long vstart, unsigned long vend, | 329 | unsigned long vstart, unsigned long vend, |
330 | int node, gfp_t gfp_mask) | 330 | int node, gfp_t gfp_mask) |
331 | { | 331 | { |
332 | struct vmap_area *va; | 332 | struct vmap_area *va; |
333 | struct rb_node *n; | 333 | struct rb_node *n; |
334 | unsigned long addr; | 334 | unsigned long addr; |
335 | int purged = 0; | 335 | int purged = 0; |
336 | 336 | ||
337 | BUG_ON(!size); | 337 | BUG_ON(!size); |
338 | BUG_ON(size & ~PAGE_MASK); | 338 | BUG_ON(size & ~PAGE_MASK); |
339 | 339 | ||
340 | va = kmalloc_node(sizeof(struct vmap_area), | 340 | va = kmalloc_node(sizeof(struct vmap_area), |
341 | gfp_mask & GFP_RECLAIM_MASK, node); | 341 | gfp_mask & GFP_RECLAIM_MASK, node); |
342 | if (unlikely(!va)) | 342 | if (unlikely(!va)) |
343 | return ERR_PTR(-ENOMEM); | 343 | return ERR_PTR(-ENOMEM); |
344 | 344 | ||
345 | retry: | 345 | retry: |
346 | addr = ALIGN(vstart, align); | 346 | addr = ALIGN(vstart, align); |
347 | 347 | ||
348 | spin_lock(&vmap_area_lock); | 348 | spin_lock(&vmap_area_lock); |
349 | if (addr + size - 1 < addr) | 349 | if (addr + size - 1 < addr) |
350 | goto overflow; | 350 | goto overflow; |
351 | 351 | ||
352 | /* XXX: could have a last_hole cache */ | 352 | /* XXX: could have a last_hole cache */ |
353 | n = vmap_area_root.rb_node; | 353 | n = vmap_area_root.rb_node; |
354 | if (n) { | 354 | if (n) { |
355 | struct vmap_area *first = NULL; | 355 | struct vmap_area *first = NULL; |
356 | 356 | ||
357 | do { | 357 | do { |
358 | struct vmap_area *tmp; | 358 | struct vmap_area *tmp; |
359 | tmp = rb_entry(n, struct vmap_area, rb_node); | 359 | tmp = rb_entry(n, struct vmap_area, rb_node); |
360 | if (tmp->va_end >= addr) { | 360 | if (tmp->va_end >= addr) { |
361 | if (!first && tmp->va_start < addr + size) | 361 | if (!first && tmp->va_start < addr + size) |
362 | first = tmp; | 362 | first = tmp; |
363 | n = n->rb_left; | 363 | n = n->rb_left; |
364 | } else { | 364 | } else { |
365 | first = tmp; | 365 | first = tmp; |
366 | n = n->rb_right; | 366 | n = n->rb_right; |
367 | } | 367 | } |
368 | } while (n); | 368 | } while (n); |
369 | 369 | ||
370 | if (!first) | 370 | if (!first) |
371 | goto found; | 371 | goto found; |
372 | 372 | ||
373 | if (first->va_end < addr) { | 373 | if (first->va_end < addr) { |
374 | n = rb_next(&first->rb_node); | 374 | n = rb_next(&first->rb_node); |
375 | if (n) | 375 | if (n) |
376 | first = rb_entry(n, struct vmap_area, rb_node); | 376 | first = rb_entry(n, struct vmap_area, rb_node); |
377 | else | 377 | else |
378 | goto found; | 378 | goto found; |
379 | } | 379 | } |
380 | 380 | ||
381 | while (addr + size > first->va_start && addr + size <= vend) { | 381 | while (addr + size > first->va_start && addr + size <= vend) { |
382 | addr = ALIGN(first->va_end + PAGE_SIZE, align); | 382 | addr = ALIGN(first->va_end + PAGE_SIZE, align); |
383 | if (addr + size - 1 < addr) | 383 | if (addr + size - 1 < addr) |
384 | goto overflow; | 384 | goto overflow; |
385 | 385 | ||
386 | n = rb_next(&first->rb_node); | 386 | n = rb_next(&first->rb_node); |
387 | if (n) | 387 | if (n) |
388 | first = rb_entry(n, struct vmap_area, rb_node); | 388 | first = rb_entry(n, struct vmap_area, rb_node); |
389 | else | 389 | else |
390 | goto found; | 390 | goto found; |
391 | } | 391 | } |
392 | } | 392 | } |
393 | found: | 393 | found: |
394 | if (addr + size > vend) { | 394 | if (addr + size > vend) { |
395 | overflow: | 395 | overflow: |
396 | spin_unlock(&vmap_area_lock); | 396 | spin_unlock(&vmap_area_lock); |
397 | if (!purged) { | 397 | if (!purged) { |
398 | purge_vmap_area_lazy(); | 398 | purge_vmap_area_lazy(); |
399 | purged = 1; | 399 | purged = 1; |
400 | goto retry; | 400 | goto retry; |
401 | } | 401 | } |
402 | if (printk_ratelimit()) | 402 | if (printk_ratelimit()) |
403 | printk(KERN_WARNING | 403 | printk(KERN_WARNING |
404 | "vmap allocation for size %lu failed: " | 404 | "vmap allocation for size %lu failed: " |
405 | "use vmalloc=<size> to increase size.\n", size); | 405 | "use vmalloc=<size> to increase size.\n", size); |
406 | kfree(va); | 406 | kfree(va); |
407 | return ERR_PTR(-EBUSY); | 407 | return ERR_PTR(-EBUSY); |
408 | } | 408 | } |
409 | 409 | ||
410 | BUG_ON(addr & (align-1)); | 410 | BUG_ON(addr & (align-1)); |
411 | 411 | ||
412 | va->va_start = addr; | 412 | va->va_start = addr; |
413 | va->va_end = addr + size; | 413 | va->va_end = addr + size; |
414 | va->flags = 0; | 414 | va->flags = 0; |
415 | __insert_vmap_area(va); | 415 | __insert_vmap_area(va); |
416 | spin_unlock(&vmap_area_lock); | 416 | spin_unlock(&vmap_area_lock); |
417 | 417 | ||
418 | return va; | 418 | return va; |
419 | } | 419 | } |
420 | 420 | ||
421 | static void rcu_free_va(struct rcu_head *head) | 421 | static void rcu_free_va(struct rcu_head *head) |
422 | { | 422 | { |
423 | struct vmap_area *va = container_of(head, struct vmap_area, rcu_head); | 423 | struct vmap_area *va = container_of(head, struct vmap_area, rcu_head); |
424 | 424 | ||
425 | kfree(va); | 425 | kfree(va); |
426 | } | 426 | } |
427 | 427 | ||
428 | static void __free_vmap_area(struct vmap_area *va) | 428 | static void __free_vmap_area(struct vmap_area *va) |
429 | { | 429 | { |
430 | BUG_ON(RB_EMPTY_NODE(&va->rb_node)); | 430 | BUG_ON(RB_EMPTY_NODE(&va->rb_node)); |
431 | rb_erase(&va->rb_node, &vmap_area_root); | 431 | rb_erase(&va->rb_node, &vmap_area_root); |
432 | RB_CLEAR_NODE(&va->rb_node); | 432 | RB_CLEAR_NODE(&va->rb_node); |
433 | list_del_rcu(&va->list); | 433 | list_del_rcu(&va->list); |
434 | 434 | ||
435 | /* | 435 | /* |
436 | * Track the highest possible candidate for pcpu area | 436 | * Track the highest possible candidate for pcpu area |
437 | * allocation. Areas outside of vmalloc area can be returned | 437 | * allocation. Areas outside of vmalloc area can be returned |
438 | * here too, consider only end addresses which fall inside | 438 | * here too, consider only end addresses which fall inside |
439 | * vmalloc area proper. | 439 | * vmalloc area proper. |
440 | */ | 440 | */ |
441 | if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END) | 441 | if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END) |
442 | vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end); | 442 | vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end); |
443 | 443 | ||
444 | call_rcu(&va->rcu_head, rcu_free_va); | 444 | call_rcu(&va->rcu_head, rcu_free_va); |
445 | } | 445 | } |
446 | 446 | ||
447 | /* | 447 | /* |
448 | * Free a region of KVA allocated by alloc_vmap_area | 448 | * Free a region of KVA allocated by alloc_vmap_area |
449 | */ | 449 | */ |
450 | static void free_vmap_area(struct vmap_area *va) | 450 | static void free_vmap_area(struct vmap_area *va) |
451 | { | 451 | { |
452 | spin_lock(&vmap_area_lock); | 452 | spin_lock(&vmap_area_lock); |
453 | __free_vmap_area(va); | 453 | __free_vmap_area(va); |
454 | spin_unlock(&vmap_area_lock); | 454 | spin_unlock(&vmap_area_lock); |
455 | } | 455 | } |
456 | 456 | ||
457 | /* | 457 | /* |
458 | * Clear the pagetable entries of a given vmap_area | 458 | * Clear the pagetable entries of a given vmap_area |
459 | */ | 459 | */ |
460 | static void unmap_vmap_area(struct vmap_area *va) | 460 | static void unmap_vmap_area(struct vmap_area *va) |
461 | { | 461 | { |
462 | vunmap_page_range(va->va_start, va->va_end); | 462 | vunmap_page_range(va->va_start, va->va_end); |
463 | } | 463 | } |
464 | 464 | ||
465 | static void vmap_debug_free_range(unsigned long start, unsigned long end) | 465 | static void vmap_debug_free_range(unsigned long start, unsigned long end) |
466 | { | 466 | { |
467 | /* | 467 | /* |
468 | * Unmap page tables and force a TLB flush immediately if | 468 | * Unmap page tables and force a TLB flush immediately if |
469 | * CONFIG_DEBUG_PAGEALLOC is set. This catches use after free | 469 | * CONFIG_DEBUG_PAGEALLOC is set. This catches use after free |
470 | * bugs similarly to those in linear kernel virtual address | 470 | * bugs similarly to those in linear kernel virtual address |
471 | * space after a page has been freed. | 471 | * space after a page has been freed. |
472 | * | 472 | * |
473 | * All the lazy freeing logic is still retained, in order to | 473 | * All the lazy freeing logic is still retained, in order to |
474 | * minimise intrusiveness of this debugging feature. | 474 | * minimise intrusiveness of this debugging feature. |
475 | * | 475 | * |
476 | * This is going to be *slow* (linear kernel virtual address | 476 | * This is going to be *slow* (linear kernel virtual address |
477 | * debugging doesn't do a broadcast TLB flush so it is a lot | 477 | * debugging doesn't do a broadcast TLB flush so it is a lot |
478 | * faster). | 478 | * faster). |
479 | */ | 479 | */ |
480 | #ifdef CONFIG_DEBUG_PAGEALLOC | 480 | #ifdef CONFIG_DEBUG_PAGEALLOC |
481 | vunmap_page_range(start, end); | 481 | vunmap_page_range(start, end); |
482 | flush_tlb_kernel_range(start, end); | 482 | flush_tlb_kernel_range(start, end); |
483 | #endif | 483 | #endif |
484 | } | 484 | } |
485 | 485 | ||
486 | /* | 486 | /* |
487 | * lazy_max_pages is the maximum amount of virtual address space we gather up | 487 | * lazy_max_pages is the maximum amount of virtual address space we gather up |
488 | * before attempting to purge with a TLB flush. | 488 | * before attempting to purge with a TLB flush. |
489 | * | 489 | * |
490 | * There is a tradeoff here: a larger number will cover more kernel page tables | 490 | * There is a tradeoff here: a larger number will cover more kernel page tables |
491 | * and take slightly longer to purge, but it will linearly reduce the number of | 491 | * and take slightly longer to purge, but it will linearly reduce the number of |
492 | * global TLB flushes that must be performed. It would seem natural to scale | 492 | * global TLB flushes that must be performed. It would seem natural to scale |
493 | * this number up linearly with the number of CPUs (because vmapping activity | 493 | * this number up linearly with the number of CPUs (because vmapping activity |
494 | * could also scale linearly with the number of CPUs), however it is likely | 494 | * could also scale linearly with the number of CPUs), however it is likely |
495 | * that in practice, workloads might be constrained in other ways that mean | 495 | * that in practice, workloads might be constrained in other ways that mean |
496 | * vmap activity will not scale linearly with CPUs. Also, I want to be | 496 | * vmap activity will not scale linearly with CPUs. Also, I want to be |
497 | * conservative and not introduce a big latency on huge systems, so go with | 497 | * conservative and not introduce a big latency on huge systems, so go with |
498 | * a less aggressive log scale. It will still be an improvement over the old | 498 | * a less aggressive log scale. It will still be an improvement over the old |
499 | * code, and it will be simple to change the scale factor if we find that it | 499 | * code, and it will be simple to change the scale factor if we find that it |
500 | * becomes a problem on bigger systems. | 500 | * becomes a problem on bigger systems. |
501 | */ | 501 | */ |
502 | static unsigned long lazy_max_pages(void) | 502 | static unsigned long lazy_max_pages(void) |
503 | { | 503 | { |
504 | unsigned int log; | 504 | unsigned int log; |
505 | 505 | ||
506 | if (!vmap_lazy_unmap) | 506 | if (!vmap_lazy_unmap) |
507 | return 0; | 507 | return 0; |
508 | 508 | ||
509 | log = fls(num_online_cpus()); | 509 | log = fls(num_online_cpus()); |
510 | 510 | ||
511 | return log * (32UL * 1024 * 1024 / PAGE_SIZE); | 511 | return log * (32UL * 1024 * 1024 / PAGE_SIZE); |
512 | } | 512 | } |
513 | 513 | ||
514 | static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); | 514 | static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); |
515 | 515 | ||
516 | /* for per-CPU blocks */ | 516 | /* for per-CPU blocks */ |
517 | static void purge_fragmented_blocks_allcpus(void); | 517 | static void purge_fragmented_blocks_allcpus(void); |
518 | 518 | ||
519 | /* | 519 | /* |
520 | * called before a call to iounmap() if the caller wants vm_area_struct's | 520 | * called before a call to iounmap() if the caller wants vm_area_struct's |
521 | * immediately freed. | 521 | * immediately freed. |
522 | */ | 522 | */ |
523 | void set_iounmap_nonlazy(void) | 523 | void set_iounmap_nonlazy(void) |
524 | { | 524 | { |
525 | atomic_set(&vmap_lazy_nr, lazy_max_pages()+1); | 525 | atomic_set(&vmap_lazy_nr, lazy_max_pages()+1); |
526 | } | 526 | } |
527 | 527 | ||
528 | /* | 528 | /* |
529 | * Purges all lazily-freed vmap areas. | 529 | * Purges all lazily-freed vmap areas. |
530 | * | 530 | * |
531 | * If sync is 0 then don't purge if there is already a purge in progress. | 531 | * If sync is 0 then don't purge if there is already a purge in progress. |
532 | * If force_flush is 1, then flush kernel TLBs between *start and *end even | 532 | * If force_flush is 1, then flush kernel TLBs between *start and *end even |
533 | * if we found no lazy vmap areas to unmap (callers can use this to optimise | 533 | * if we found no lazy vmap areas to unmap (callers can use this to optimise |
534 | * their own TLB flushing). | 534 | * their own TLB flushing). |
535 | * Returns with *start = min(*start, lowest purged address) | 535 | * Returns with *start = min(*start, lowest purged address) |
536 | * *end = max(*end, highest purged address) | 536 | * *end = max(*end, highest purged address) |
537 | */ | 537 | */ |
538 | static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | 538 | static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, |
539 | int sync, int force_flush) | 539 | int sync, int force_flush) |
540 | { | 540 | { |
541 | static DEFINE_SPINLOCK(purge_lock); | 541 | static DEFINE_SPINLOCK(purge_lock); |
542 | LIST_HEAD(valist); | 542 | LIST_HEAD(valist); |
543 | struct vmap_area *va; | 543 | struct vmap_area *va; |
544 | struct vmap_area *n_va; | 544 | struct vmap_area *n_va; |
545 | int nr = 0; | 545 | int nr = 0; |
546 | 546 | ||
547 | /* | 547 | /* |
548 | * If sync is 0 but force_flush is 1, we'll go sync anyway but callers | 548 | * If sync is 0 but force_flush is 1, we'll go sync anyway but callers |
549 | * should not expect such behaviour. This just simplifies locking for | 549 | * should not expect such behaviour. This just simplifies locking for |
550 | * the case that isn't actually used at the moment anyway. | 550 | * the case that isn't actually used at the moment anyway. |
551 | */ | 551 | */ |
552 | if (!sync && !force_flush) { | 552 | if (!sync && !force_flush) { |
553 | if (!spin_trylock(&purge_lock)) | 553 | if (!spin_trylock(&purge_lock)) |
554 | return; | 554 | return; |
555 | } else | 555 | } else |
556 | spin_lock(&purge_lock); | 556 | spin_lock(&purge_lock); |
557 | 557 | ||
558 | if (sync) | 558 | if (sync) |
559 | purge_fragmented_blocks_allcpus(); | 559 | purge_fragmented_blocks_allcpus(); |
560 | 560 | ||
561 | rcu_read_lock(); | 561 | rcu_read_lock(); |
562 | list_for_each_entry_rcu(va, &vmap_area_list, list) { | 562 | list_for_each_entry_rcu(va, &vmap_area_list, list) { |
563 | if (va->flags & VM_LAZY_FREE) { | 563 | if (va->flags & VM_LAZY_FREE) { |
564 | if (va->va_start < *start) | 564 | if (va->va_start < *start) |
565 | *start = va->va_start; | 565 | *start = va->va_start; |
566 | if (va->va_end > *end) | 566 | if (va->va_end > *end) |
567 | *end = va->va_end; | 567 | *end = va->va_end; |
568 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; | 568 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; |
569 | unmap_vmap_area(va); | 569 | unmap_vmap_area(va); |
570 | list_add_tail(&va->purge_list, &valist); | 570 | list_add_tail(&va->purge_list, &valist); |
571 | va->flags |= VM_LAZY_FREEING; | 571 | va->flags |= VM_LAZY_FREEING; |
572 | va->flags &= ~VM_LAZY_FREE; | 572 | va->flags &= ~VM_LAZY_FREE; |
573 | } | 573 | } |
574 | } | 574 | } |
575 | rcu_read_unlock(); | 575 | rcu_read_unlock(); |
576 | 576 | ||
577 | if (nr) | 577 | if (nr) |
578 | atomic_sub(nr, &vmap_lazy_nr); | 578 | atomic_sub(nr, &vmap_lazy_nr); |
579 | 579 | ||
580 | if (nr || force_flush) | 580 | if (nr || force_flush) |
581 | flush_tlb_kernel_range(*start, *end); | 581 | flush_tlb_kernel_range(*start, *end); |
582 | 582 | ||
583 | if (nr) { | 583 | if (nr) { |
584 | spin_lock(&vmap_area_lock); | 584 | spin_lock(&vmap_area_lock); |
585 | list_for_each_entry_safe(va, n_va, &valist, purge_list) | 585 | list_for_each_entry_safe(va, n_va, &valist, purge_list) |
586 | __free_vmap_area(va); | 586 | __free_vmap_area(va); |
587 | spin_unlock(&vmap_area_lock); | 587 | spin_unlock(&vmap_area_lock); |
588 | } | 588 | } |
589 | spin_unlock(&purge_lock); | 589 | spin_unlock(&purge_lock); |
590 | } | 590 | } |
591 | 591 | ||
592 | /* | 592 | /* |
593 | * Kick off a purge of the outstanding lazy areas. Don't bother if somebody | 593 | * Kick off a purge of the outstanding lazy areas. Don't bother if somebody |
594 | * is already purging. | 594 | * is already purging. |
595 | */ | 595 | */ |
596 | static void try_purge_vmap_area_lazy(void) | 596 | static void try_purge_vmap_area_lazy(void) |
597 | { | 597 | { |
598 | unsigned long start = ULONG_MAX, end = 0; | 598 | unsigned long start = ULONG_MAX, end = 0; |
599 | 599 | ||
600 | __purge_vmap_area_lazy(&start, &end, 0, 0); | 600 | __purge_vmap_area_lazy(&start, &end, 0, 0); |
601 | } | 601 | } |
602 | 602 | ||
603 | /* | 603 | /* |
604 | * Kick off a purge of the outstanding lazy areas. | 604 | * Kick off a purge of the outstanding lazy areas. |
605 | */ | 605 | */ |
606 | static void purge_vmap_area_lazy(void) | 606 | static void purge_vmap_area_lazy(void) |
607 | { | 607 | { |
608 | unsigned long start = ULONG_MAX, end = 0; | 608 | unsigned long start = ULONG_MAX, end = 0; |
609 | 609 | ||
610 | __purge_vmap_area_lazy(&start, &end, 1, 0); | 610 | __purge_vmap_area_lazy(&start, &end, 1, 0); |
611 | } | 611 | } |
612 | 612 | ||
613 | /* | 613 | /* |
614 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been | 614 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been |
615 | * called for the correct range previously. | 615 | * called for the correct range previously. |
616 | */ | 616 | */ |
617 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) | 617 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) |
618 | { | 618 | { |
619 | va->flags |= VM_LAZY_FREE; | 619 | va->flags |= VM_LAZY_FREE; |
620 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); | 620 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); |
621 | if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) | 621 | if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) |
622 | try_purge_vmap_area_lazy(); | 622 | try_purge_vmap_area_lazy(); |
623 | } | 623 | } |
624 | 624 | ||
625 | /* | 625 | /* |
626 | * Free and unmap a vmap area | 626 | * Free and unmap a vmap area |
627 | */ | 627 | */ |
628 | static void free_unmap_vmap_area(struct vmap_area *va) | 628 | static void free_unmap_vmap_area(struct vmap_area *va) |
629 | { | 629 | { |
630 | flush_cache_vunmap(va->va_start, va->va_end); | 630 | flush_cache_vunmap(va->va_start, va->va_end); |
631 | free_unmap_vmap_area_noflush(va); | 631 | free_unmap_vmap_area_noflush(va); |
632 | } | 632 | } |
633 | 633 | ||
634 | static struct vmap_area *find_vmap_area(unsigned long addr) | 634 | static struct vmap_area *find_vmap_area(unsigned long addr) |
635 | { | 635 | { |
636 | struct vmap_area *va; | 636 | struct vmap_area *va; |
637 | 637 | ||
638 | spin_lock(&vmap_area_lock); | 638 | spin_lock(&vmap_area_lock); |
639 | va = __find_vmap_area(addr); | 639 | va = __find_vmap_area(addr); |
640 | spin_unlock(&vmap_area_lock); | 640 | spin_unlock(&vmap_area_lock); |
641 | 641 | ||
642 | return va; | 642 | return va; |
643 | } | 643 | } |
644 | 644 | ||
645 | static void free_unmap_vmap_area_addr(unsigned long addr) | 645 | static void free_unmap_vmap_area_addr(unsigned long addr) |
646 | { | 646 | { |
647 | struct vmap_area *va; | 647 | struct vmap_area *va; |
648 | 648 | ||
649 | va = find_vmap_area(addr); | 649 | va = find_vmap_area(addr); |
650 | BUG_ON(!va); | 650 | BUG_ON(!va); |
651 | free_unmap_vmap_area(va); | 651 | free_unmap_vmap_area(va); |
652 | } | 652 | } |
653 | 653 | ||
654 | 654 | ||
655 | /*** Per cpu kva allocator ***/ | 655 | /*** Per cpu kva allocator ***/ |
656 | 656 | ||
657 | /* | 657 | /* |
658 | * vmap space is limited especially on 32 bit architectures. Ensure there is | 658 | * vmap space is limited especially on 32 bit architectures. Ensure there is |
659 | * room for at least 16 percpu vmap blocks per CPU. | 659 | * room for at least 16 percpu vmap blocks per CPU. |
660 | */ | 660 | */ |
661 | /* | 661 | /* |
662 | * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able | 662 | * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able |
663 | * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess | 663 | * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess |
664 | * instead (we just need a rough idea) | 664 | * instead (we just need a rough idea) |
665 | */ | 665 | */ |
666 | #if BITS_PER_LONG == 32 | 666 | #if BITS_PER_LONG == 32 |
667 | #define VMALLOC_SPACE (128UL*1024*1024) | 667 | #define VMALLOC_SPACE (128UL*1024*1024) |
668 | #else | 668 | #else |
669 | #define VMALLOC_SPACE (128UL*1024*1024*1024) | 669 | #define VMALLOC_SPACE (128UL*1024*1024*1024) |
670 | #endif | 670 | #endif |
671 | 671 | ||
672 | #define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE) | 672 | #define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE) |
673 | #define VMAP_MAX_ALLOC BITS_PER_LONG /* 256K with 4K pages */ | 673 | #define VMAP_MAX_ALLOC BITS_PER_LONG /* 256K with 4K pages */ |
674 | #define VMAP_BBMAP_BITS_MAX 1024 /* 4MB with 4K pages */ | 674 | #define VMAP_BBMAP_BITS_MAX 1024 /* 4MB with 4K pages */ |
675 | #define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2) | 675 | #define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2) |
676 | #define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */ | 676 | #define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */ |
677 | #define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */ | 677 | #define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */ |
678 | #define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \ | 678 | #define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \ |
679 | VMAP_MAX(VMAP_BBMAP_BITS_MIN, \ | 679 | VMAP_MAX(VMAP_BBMAP_BITS_MIN, \ |
680 | VMALLOC_PAGES / NR_CPUS / 16)) | 680 | VMALLOC_PAGES / NR_CPUS / 16)) |
681 | 681 | ||
682 | #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) | 682 | #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) |
683 | 683 | ||
684 | static bool vmap_initialized __read_mostly = false; | 684 | static bool vmap_initialized __read_mostly = false; |
685 | 685 | ||
686 | struct vmap_block_queue { | 686 | struct vmap_block_queue { |
687 | spinlock_t lock; | 687 | spinlock_t lock; |
688 | struct list_head free; | 688 | struct list_head free; |
689 | }; | 689 | }; |
690 | 690 | ||
691 | struct vmap_block { | 691 | struct vmap_block { |
692 | spinlock_t lock; | 692 | spinlock_t lock; |
693 | struct vmap_area *va; | 693 | struct vmap_area *va; |
694 | struct vmap_block_queue *vbq; | 694 | struct vmap_block_queue *vbq; |
695 | unsigned long free, dirty; | 695 | unsigned long free, dirty; |
696 | DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); | 696 | DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); |
697 | DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); | 697 | DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); |
698 | struct list_head free_list; | 698 | struct list_head free_list; |
699 | struct rcu_head rcu_head; | 699 | struct rcu_head rcu_head; |
700 | struct list_head purge; | 700 | struct list_head purge; |
701 | }; | 701 | }; |
702 | 702 | ||
703 | /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ | 703 | /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ |
704 | static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue); | 704 | static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue); |
705 | 705 | ||
706 | /* | 706 | /* |
707 | * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block | 707 | * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block |
708 | * in the free path. Could get rid of this if we change the API to return a | 708 | * in the free path. Could get rid of this if we change the API to return a |
709 | * "cookie" from alloc, to be passed to free. But no big deal yet. | 709 | * "cookie" from alloc, to be passed to free. But no big deal yet. |
710 | */ | 710 | */ |
711 | static DEFINE_SPINLOCK(vmap_block_tree_lock); | 711 | static DEFINE_SPINLOCK(vmap_block_tree_lock); |
712 | static RADIX_TREE(vmap_block_tree, GFP_ATOMIC); | 712 | static RADIX_TREE(vmap_block_tree, GFP_ATOMIC); |
713 | 713 | ||
714 | /* | 714 | /* |
715 | * We should probably have a fallback mechanism to allocate virtual memory | 715 | * We should probably have a fallback mechanism to allocate virtual memory |
716 | * out of partially filled vmap blocks. However vmap block sizing should be | 716 | * out of partially filled vmap blocks. However vmap block sizing should be |
717 | * fairly reasonable according to the vmalloc size, so it shouldn't be a | 717 | * fairly reasonable according to the vmalloc size, so it shouldn't be a |
718 | * big problem. | 718 | * big problem. |
719 | */ | 719 | */ |
720 | 720 | ||
721 | static unsigned long addr_to_vb_idx(unsigned long addr) | 721 | static unsigned long addr_to_vb_idx(unsigned long addr) |
722 | { | 722 | { |
723 | addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1); | 723 | addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1); |
724 | addr /= VMAP_BLOCK_SIZE; | 724 | addr /= VMAP_BLOCK_SIZE; |
725 | return addr; | 725 | return addr; |
726 | } | 726 | } |
727 | 727 | ||
728 | static struct vmap_block *new_vmap_block(gfp_t gfp_mask) | 728 | static struct vmap_block *new_vmap_block(gfp_t gfp_mask) |
729 | { | 729 | { |
730 | struct vmap_block_queue *vbq; | 730 | struct vmap_block_queue *vbq; |
731 | struct vmap_block *vb; | 731 | struct vmap_block *vb; |
732 | struct vmap_area *va; | 732 | struct vmap_area *va; |
733 | unsigned long vb_idx; | 733 | unsigned long vb_idx; |
734 | int node, err; | 734 | int node, err; |
735 | 735 | ||
736 | node = numa_node_id(); | 736 | node = numa_node_id(); |
737 | 737 | ||
738 | vb = kmalloc_node(sizeof(struct vmap_block), | 738 | vb = kmalloc_node(sizeof(struct vmap_block), |
739 | gfp_mask & GFP_RECLAIM_MASK, node); | 739 | gfp_mask & GFP_RECLAIM_MASK, node); |
740 | if (unlikely(!vb)) | 740 | if (unlikely(!vb)) |
741 | return ERR_PTR(-ENOMEM); | 741 | return ERR_PTR(-ENOMEM); |
742 | 742 | ||
743 | va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE, | 743 | va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE, |
744 | VMALLOC_START, VMALLOC_END, | 744 | VMALLOC_START, VMALLOC_END, |
745 | node, gfp_mask); | 745 | node, gfp_mask); |
746 | if (unlikely(IS_ERR(va))) { | 746 | if (unlikely(IS_ERR(va))) { |
747 | kfree(vb); | 747 | kfree(vb); |
748 | return ERR_CAST(va); | 748 | return ERR_CAST(va); |
749 | } | 749 | } |
750 | 750 | ||
751 | err = radix_tree_preload(gfp_mask); | 751 | err = radix_tree_preload(gfp_mask); |
752 | if (unlikely(err)) { | 752 | if (unlikely(err)) { |
753 | kfree(vb); | 753 | kfree(vb); |
754 | free_vmap_area(va); | 754 | free_vmap_area(va); |
755 | return ERR_PTR(err); | 755 | return ERR_PTR(err); |
756 | } | 756 | } |
757 | 757 | ||
758 | spin_lock_init(&vb->lock); | 758 | spin_lock_init(&vb->lock); |
759 | vb->va = va; | 759 | vb->va = va; |
760 | vb->free = VMAP_BBMAP_BITS; | 760 | vb->free = VMAP_BBMAP_BITS; |
761 | vb->dirty = 0; | 761 | vb->dirty = 0; |
762 | bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS); | 762 | bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS); |
763 | bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); | 763 | bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); |
764 | INIT_LIST_HEAD(&vb->free_list); | 764 | INIT_LIST_HEAD(&vb->free_list); |
765 | 765 | ||
766 | vb_idx = addr_to_vb_idx(va->va_start); | 766 | vb_idx = addr_to_vb_idx(va->va_start); |
767 | spin_lock(&vmap_block_tree_lock); | 767 | spin_lock(&vmap_block_tree_lock); |
768 | err = radix_tree_insert(&vmap_block_tree, vb_idx, vb); | 768 | err = radix_tree_insert(&vmap_block_tree, vb_idx, vb); |
769 | spin_unlock(&vmap_block_tree_lock); | 769 | spin_unlock(&vmap_block_tree_lock); |
770 | BUG_ON(err); | 770 | BUG_ON(err); |
771 | radix_tree_preload_end(); | 771 | radix_tree_preload_end(); |
772 | 772 | ||
773 | vbq = &get_cpu_var(vmap_block_queue); | 773 | vbq = &get_cpu_var(vmap_block_queue); |
774 | vb->vbq = vbq; | 774 | vb->vbq = vbq; |
775 | spin_lock(&vbq->lock); | 775 | spin_lock(&vbq->lock); |
776 | list_add_rcu(&vb->free_list, &vbq->free); | 776 | list_add_rcu(&vb->free_list, &vbq->free); |
777 | spin_unlock(&vbq->lock); | 777 | spin_unlock(&vbq->lock); |
778 | put_cpu_var(vmap_block_queue); | 778 | put_cpu_var(vmap_block_queue); |
779 | 779 | ||
780 | return vb; | 780 | return vb; |
781 | } | 781 | } |
782 | 782 | ||
783 | static void rcu_free_vb(struct rcu_head *head) | 783 | static void rcu_free_vb(struct rcu_head *head) |
784 | { | 784 | { |
785 | struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head); | 785 | struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head); |
786 | 786 | ||
787 | kfree(vb); | 787 | kfree(vb); |
788 | } | 788 | } |
789 | 789 | ||
790 | static void free_vmap_block(struct vmap_block *vb) | 790 | static void free_vmap_block(struct vmap_block *vb) |
791 | { | 791 | { |
792 | struct vmap_block *tmp; | 792 | struct vmap_block *tmp; |
793 | unsigned long vb_idx; | 793 | unsigned long vb_idx; |
794 | 794 | ||
795 | vb_idx = addr_to_vb_idx(vb->va->va_start); | 795 | vb_idx = addr_to_vb_idx(vb->va->va_start); |
796 | spin_lock(&vmap_block_tree_lock); | 796 | spin_lock(&vmap_block_tree_lock); |
797 | tmp = radix_tree_delete(&vmap_block_tree, vb_idx); | 797 | tmp = radix_tree_delete(&vmap_block_tree, vb_idx); |
798 | spin_unlock(&vmap_block_tree_lock); | 798 | spin_unlock(&vmap_block_tree_lock); |
799 | BUG_ON(tmp != vb); | 799 | BUG_ON(tmp != vb); |
800 | 800 | ||
801 | free_unmap_vmap_area_noflush(vb->va); | 801 | free_unmap_vmap_area_noflush(vb->va); |
802 | call_rcu(&vb->rcu_head, rcu_free_vb); | 802 | call_rcu(&vb->rcu_head, rcu_free_vb); |
803 | } | 803 | } |
804 | 804 | ||
805 | static void purge_fragmented_blocks(int cpu) | 805 | static void purge_fragmented_blocks(int cpu) |
806 | { | 806 | { |
807 | LIST_HEAD(purge); | 807 | LIST_HEAD(purge); |
808 | struct vmap_block *vb; | 808 | struct vmap_block *vb; |
809 | struct vmap_block *n_vb; | 809 | struct vmap_block *n_vb; |
810 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); | 810 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); |
811 | 811 | ||
812 | rcu_read_lock(); | 812 | rcu_read_lock(); |
813 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | 813 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { |
814 | 814 | ||
815 | if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)) | 815 | if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)) |
816 | continue; | 816 | continue; |
817 | 817 | ||
818 | spin_lock(&vb->lock); | 818 | spin_lock(&vb->lock); |
819 | if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { | 819 | if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { |
820 | vb->free = 0; /* prevent further allocs after releasing lock */ | 820 | vb->free = 0; /* prevent further allocs after releasing lock */ |
821 | vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ | 821 | vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ |
822 | bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS); | 822 | bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS); |
823 | bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS); | 823 | bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS); |
824 | spin_lock(&vbq->lock); | 824 | spin_lock(&vbq->lock); |
825 | list_del_rcu(&vb->free_list); | 825 | list_del_rcu(&vb->free_list); |
826 | spin_unlock(&vbq->lock); | 826 | spin_unlock(&vbq->lock); |
827 | spin_unlock(&vb->lock); | 827 | spin_unlock(&vb->lock); |
828 | list_add_tail(&vb->purge, &purge); | 828 | list_add_tail(&vb->purge, &purge); |
829 | } else | 829 | } else |
830 | spin_unlock(&vb->lock); | 830 | spin_unlock(&vb->lock); |
831 | } | 831 | } |
832 | rcu_read_unlock(); | 832 | rcu_read_unlock(); |
833 | 833 | ||
834 | list_for_each_entry_safe(vb, n_vb, &purge, purge) { | 834 | list_for_each_entry_safe(vb, n_vb, &purge, purge) { |
835 | list_del(&vb->purge); | 835 | list_del(&vb->purge); |
836 | free_vmap_block(vb); | 836 | free_vmap_block(vb); |
837 | } | 837 | } |
838 | } | 838 | } |
839 | 839 | ||
840 | static void purge_fragmented_blocks_thiscpu(void) | 840 | static void purge_fragmented_blocks_thiscpu(void) |
841 | { | 841 | { |
842 | purge_fragmented_blocks(smp_processor_id()); | 842 | purge_fragmented_blocks(smp_processor_id()); |
843 | } | 843 | } |
844 | 844 | ||
845 | static void purge_fragmented_blocks_allcpus(void) | 845 | static void purge_fragmented_blocks_allcpus(void) |
846 | { | 846 | { |
847 | int cpu; | 847 | int cpu; |
848 | 848 | ||
849 | for_each_possible_cpu(cpu) | 849 | for_each_possible_cpu(cpu) |
850 | purge_fragmented_blocks(cpu); | 850 | purge_fragmented_blocks(cpu); |
851 | } | 851 | } |
852 | 852 | ||
853 | static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | 853 | static void *vb_alloc(unsigned long size, gfp_t gfp_mask) |
854 | { | 854 | { |
855 | struct vmap_block_queue *vbq; | 855 | struct vmap_block_queue *vbq; |
856 | struct vmap_block *vb; | 856 | struct vmap_block *vb; |
857 | unsigned long addr = 0; | 857 | unsigned long addr = 0; |
858 | unsigned int order; | 858 | unsigned int order; |
859 | int purge = 0; | 859 | int purge = 0; |
860 | 860 | ||
861 | BUG_ON(size & ~PAGE_MASK); | 861 | BUG_ON(size & ~PAGE_MASK); |
862 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | 862 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); |
863 | order = get_order(size); | 863 | order = get_order(size); |
864 | 864 | ||
865 | again: | 865 | again: |
866 | rcu_read_lock(); | 866 | rcu_read_lock(); |
867 | vbq = &get_cpu_var(vmap_block_queue); | 867 | vbq = &get_cpu_var(vmap_block_queue); |
868 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | 868 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { |
869 | int i; | 869 | int i; |
870 | 870 | ||
871 | spin_lock(&vb->lock); | 871 | spin_lock(&vb->lock); |
872 | if (vb->free < 1UL << order) | 872 | if (vb->free < 1UL << order) |
873 | goto next; | 873 | goto next; |
874 | 874 | ||
875 | i = bitmap_find_free_region(vb->alloc_map, | 875 | i = bitmap_find_free_region(vb->alloc_map, |
876 | VMAP_BBMAP_BITS, order); | 876 | VMAP_BBMAP_BITS, order); |
877 | 877 | ||
878 | if (i < 0) { | 878 | if (i < 0) { |
879 | if (vb->free + vb->dirty == VMAP_BBMAP_BITS) { | 879 | if (vb->free + vb->dirty == VMAP_BBMAP_BITS) { |
880 | /* fragmented and no outstanding allocations */ | 880 | /* fragmented and no outstanding allocations */ |
881 | BUG_ON(vb->dirty != VMAP_BBMAP_BITS); | 881 | BUG_ON(vb->dirty != VMAP_BBMAP_BITS); |
882 | purge = 1; | 882 | purge = 1; |
883 | } | 883 | } |
884 | goto next; | 884 | goto next; |
885 | } | 885 | } |
886 | addr = vb->va->va_start + (i << PAGE_SHIFT); | 886 | addr = vb->va->va_start + (i << PAGE_SHIFT); |
887 | BUG_ON(addr_to_vb_idx(addr) != | 887 | BUG_ON(addr_to_vb_idx(addr) != |
888 | addr_to_vb_idx(vb->va->va_start)); | 888 | addr_to_vb_idx(vb->va->va_start)); |
889 | vb->free -= 1UL << order; | 889 | vb->free -= 1UL << order; |
890 | if (vb->free == 0) { | 890 | if (vb->free == 0) { |
891 | spin_lock(&vbq->lock); | 891 | spin_lock(&vbq->lock); |
892 | list_del_rcu(&vb->free_list); | 892 | list_del_rcu(&vb->free_list); |
893 | spin_unlock(&vbq->lock); | 893 | spin_unlock(&vbq->lock); |
894 | } | 894 | } |
895 | spin_unlock(&vb->lock); | 895 | spin_unlock(&vb->lock); |
896 | break; | 896 | break; |
897 | next: | 897 | next: |
898 | spin_unlock(&vb->lock); | 898 | spin_unlock(&vb->lock); |
899 | } | 899 | } |
900 | 900 | ||
901 | if (purge) | 901 | if (purge) |
902 | purge_fragmented_blocks_thiscpu(); | 902 | purge_fragmented_blocks_thiscpu(); |
903 | 903 | ||
904 | put_cpu_var(vmap_block_queue); | 904 | put_cpu_var(vmap_block_queue); |
905 | rcu_read_unlock(); | 905 | rcu_read_unlock(); |
906 | 906 | ||
907 | if (!addr) { | 907 | if (!addr) { |
908 | vb = new_vmap_block(gfp_mask); | 908 | vb = new_vmap_block(gfp_mask); |
909 | if (IS_ERR(vb)) | 909 | if (IS_ERR(vb)) |
910 | return vb; | 910 | return vb; |
911 | goto again; | 911 | goto again; |
912 | } | 912 | } |
913 | 913 | ||
914 | return (void *)addr; | 914 | return (void *)addr; |
915 | } | 915 | } |
916 | 916 | ||
917 | static void vb_free(const void *addr, unsigned long size) | 917 | static void vb_free(const void *addr, unsigned long size) |
918 | { | 918 | { |
919 | unsigned long offset; | 919 | unsigned long offset; |
920 | unsigned long vb_idx; | 920 | unsigned long vb_idx; |
921 | unsigned int order; | 921 | unsigned int order; |
922 | struct vmap_block *vb; | 922 | struct vmap_block *vb; |
923 | 923 | ||
924 | BUG_ON(size & ~PAGE_MASK); | 924 | BUG_ON(size & ~PAGE_MASK); |
925 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | 925 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); |
926 | 926 | ||
927 | flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size); | 927 | flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size); |
928 | 928 | ||
929 | order = get_order(size); | 929 | order = get_order(size); |
930 | 930 | ||
931 | offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); | 931 | offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); |
932 | 932 | ||
933 | vb_idx = addr_to_vb_idx((unsigned long)addr); | 933 | vb_idx = addr_to_vb_idx((unsigned long)addr); |
934 | rcu_read_lock(); | 934 | rcu_read_lock(); |
935 | vb = radix_tree_lookup(&vmap_block_tree, vb_idx); | 935 | vb = radix_tree_lookup(&vmap_block_tree, vb_idx); |
936 | rcu_read_unlock(); | 936 | rcu_read_unlock(); |
937 | BUG_ON(!vb); | 937 | BUG_ON(!vb); |
938 | 938 | ||
939 | spin_lock(&vb->lock); | 939 | spin_lock(&vb->lock); |
940 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); | 940 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); |
941 | 941 | ||
942 | vb->dirty += 1UL << order; | 942 | vb->dirty += 1UL << order; |
943 | if (vb->dirty == VMAP_BBMAP_BITS) { | 943 | if (vb->dirty == VMAP_BBMAP_BITS) { |
944 | BUG_ON(vb->free); | 944 | BUG_ON(vb->free); |
945 | spin_unlock(&vb->lock); | 945 | spin_unlock(&vb->lock); |
946 | free_vmap_block(vb); | 946 | free_vmap_block(vb); |
947 | } else | 947 | } else |
948 | spin_unlock(&vb->lock); | 948 | spin_unlock(&vb->lock); |
949 | } | 949 | } |
950 | 950 | ||
951 | /** | 951 | /** |
952 | * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer | 952 | * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer |
953 | * | 953 | * |
954 | * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily | 954 | * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily |
955 | * to amortize TLB flushing overheads. What this means is that any page you | 955 | * to amortize TLB flushing overheads. What this means is that any page you |
956 | * have now, may, in a former life, have been mapped into kernel virtual | 956 | * have now, may, in a former life, have been mapped into kernel virtual |
957 | * address by the vmap layer and so there might be some CPUs with TLB entries | 957 | * address by the vmap layer and so there might be some CPUs with TLB entries |
958 | * still referencing that page (additional to the regular 1:1 kernel mapping). | 958 | * still referencing that page (additional to the regular 1:1 kernel mapping). |
959 | * | 959 | * |
960 | * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can | 960 | * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can |
961 | * be sure that none of the pages we have control over will have any aliases | 961 | * be sure that none of the pages we have control over will have any aliases |
962 | * from the vmap layer. | 962 | * from the vmap layer. |
963 | */ | 963 | */ |
964 | void vm_unmap_aliases(void) | 964 | void vm_unmap_aliases(void) |
965 | { | 965 | { |
966 | unsigned long start = ULONG_MAX, end = 0; | 966 | unsigned long start = ULONG_MAX, end = 0; |
967 | int cpu; | 967 | int cpu; |
968 | int flush = 0; | 968 | int flush = 0; |
969 | 969 | ||
970 | if (unlikely(!vmap_initialized)) | 970 | if (unlikely(!vmap_initialized)) |
971 | return; | 971 | return; |
972 | 972 | ||
973 | for_each_possible_cpu(cpu) { | 973 | for_each_possible_cpu(cpu) { |
974 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); | 974 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); |
975 | struct vmap_block *vb; | 975 | struct vmap_block *vb; |
976 | 976 | ||
977 | rcu_read_lock(); | 977 | rcu_read_lock(); |
978 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | 978 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { |
979 | int i; | 979 | int i; |
980 | 980 | ||
981 | spin_lock(&vb->lock); | 981 | spin_lock(&vb->lock); |
982 | i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS); | 982 | i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS); |
983 | while (i < VMAP_BBMAP_BITS) { | 983 | while (i < VMAP_BBMAP_BITS) { |
984 | unsigned long s, e; | 984 | unsigned long s, e; |
985 | int j; | 985 | int j; |
986 | j = find_next_zero_bit(vb->dirty_map, | 986 | j = find_next_zero_bit(vb->dirty_map, |
987 | VMAP_BBMAP_BITS, i); | 987 | VMAP_BBMAP_BITS, i); |
988 | 988 | ||
989 | s = vb->va->va_start + (i << PAGE_SHIFT); | 989 | s = vb->va->va_start + (i << PAGE_SHIFT); |
990 | e = vb->va->va_start + (j << PAGE_SHIFT); | 990 | e = vb->va->va_start + (j << PAGE_SHIFT); |
991 | vunmap_page_range(s, e); | 991 | vunmap_page_range(s, e); |
992 | flush = 1; | 992 | flush = 1; |
993 | 993 | ||
994 | if (s < start) | 994 | if (s < start) |
995 | start = s; | 995 | start = s; |
996 | if (e > end) | 996 | if (e > end) |
997 | end = e; | 997 | end = e; |
998 | 998 | ||
999 | i = j; | 999 | i = j; |
1000 | i = find_next_bit(vb->dirty_map, | 1000 | i = find_next_bit(vb->dirty_map, |
1001 | VMAP_BBMAP_BITS, i); | 1001 | VMAP_BBMAP_BITS, i); |
1002 | } | 1002 | } |
1003 | spin_unlock(&vb->lock); | 1003 | spin_unlock(&vb->lock); |
1004 | } | 1004 | } |
1005 | rcu_read_unlock(); | 1005 | rcu_read_unlock(); |
1006 | } | 1006 | } |
1007 | 1007 | ||
1008 | __purge_vmap_area_lazy(&start, &end, 1, flush); | 1008 | __purge_vmap_area_lazy(&start, &end, 1, flush); |
1009 | } | 1009 | } |
1010 | EXPORT_SYMBOL_GPL(vm_unmap_aliases); | 1010 | EXPORT_SYMBOL_GPL(vm_unmap_aliases); |
1011 | 1011 | ||
1012 | /** | 1012 | /** |
1013 | * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram | 1013 | * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram |
1014 | * @mem: the pointer returned by vm_map_ram | 1014 | * @mem: the pointer returned by vm_map_ram |
1015 | * @count: the count passed to that vm_map_ram call (cannot unmap partial) | 1015 | * @count: the count passed to that vm_map_ram call (cannot unmap partial) |
1016 | */ | 1016 | */ |
1017 | void vm_unmap_ram(const void *mem, unsigned int count) | 1017 | void vm_unmap_ram(const void *mem, unsigned int count) |
1018 | { | 1018 | { |
1019 | unsigned long size = count << PAGE_SHIFT; | 1019 | unsigned long size = count << PAGE_SHIFT; |
1020 | unsigned long addr = (unsigned long)mem; | 1020 | unsigned long addr = (unsigned long)mem; |
1021 | 1021 | ||
1022 | BUG_ON(!addr); | 1022 | BUG_ON(!addr); |
1023 | BUG_ON(addr < VMALLOC_START); | 1023 | BUG_ON(addr < VMALLOC_START); |
1024 | BUG_ON(addr > VMALLOC_END); | 1024 | BUG_ON(addr > VMALLOC_END); |
1025 | BUG_ON(addr & (PAGE_SIZE-1)); | 1025 | BUG_ON(addr & (PAGE_SIZE-1)); |
1026 | 1026 | ||
1027 | debug_check_no_locks_freed(mem, size); | 1027 | debug_check_no_locks_freed(mem, size); |
1028 | vmap_debug_free_range(addr, addr+size); | 1028 | vmap_debug_free_range(addr, addr+size); |
1029 | 1029 | ||
1030 | if (likely(count <= VMAP_MAX_ALLOC)) | 1030 | if (likely(count <= VMAP_MAX_ALLOC)) |
1031 | vb_free(mem, size); | 1031 | vb_free(mem, size); |
1032 | else | 1032 | else |
1033 | free_unmap_vmap_area_addr(addr); | 1033 | free_unmap_vmap_area_addr(addr); |
1034 | } | 1034 | } |
1035 | EXPORT_SYMBOL(vm_unmap_ram); | 1035 | EXPORT_SYMBOL(vm_unmap_ram); |
1036 | 1036 | ||
1037 | /** | 1037 | /** |
1038 | * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space) | 1038 | * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space) |
1039 | * @pages: an array of pointers to the pages to be mapped | 1039 | * @pages: an array of pointers to the pages to be mapped |
1040 | * @count: number of pages | 1040 | * @count: number of pages |
1041 | * @node: prefer to allocate data structures on this node | 1041 | * @node: prefer to allocate data structures on this node |
1042 | * @prot: memory protection to use. PAGE_KERNEL for regular RAM | 1042 | * @prot: memory protection to use. PAGE_KERNEL for regular RAM |
1043 | * | 1043 | * |
1044 | * Returns: a pointer to the address that has been mapped, or %NULL on failure | 1044 | * Returns: a pointer to the address that has been mapped, or %NULL on failure |
1045 | */ | 1045 | */ |
1046 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) | 1046 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) |
1047 | { | 1047 | { |
1048 | unsigned long size = count << PAGE_SHIFT; | 1048 | unsigned long size = count << PAGE_SHIFT; |
1049 | unsigned long addr; | 1049 | unsigned long addr; |
1050 | void *mem; | 1050 | void *mem; |
1051 | 1051 | ||
1052 | if (likely(count <= VMAP_MAX_ALLOC)) { | 1052 | if (likely(count <= VMAP_MAX_ALLOC)) { |
1053 | mem = vb_alloc(size, GFP_KERNEL); | 1053 | mem = vb_alloc(size, GFP_KERNEL); |
1054 | if (IS_ERR(mem)) | 1054 | if (IS_ERR(mem)) |
1055 | return NULL; | 1055 | return NULL; |
1056 | addr = (unsigned long)mem; | 1056 | addr = (unsigned long)mem; |
1057 | } else { | 1057 | } else { |
1058 | struct vmap_area *va; | 1058 | struct vmap_area *va; |
1059 | va = alloc_vmap_area(size, PAGE_SIZE, | 1059 | va = alloc_vmap_area(size, PAGE_SIZE, |
1060 | VMALLOC_START, VMALLOC_END, node, GFP_KERNEL); | 1060 | VMALLOC_START, VMALLOC_END, node, GFP_KERNEL); |
1061 | if (IS_ERR(va)) | 1061 | if (IS_ERR(va)) |
1062 | return NULL; | 1062 | return NULL; |
1063 | 1063 | ||
1064 | addr = va->va_start; | 1064 | addr = va->va_start; |
1065 | mem = (void *)addr; | 1065 | mem = (void *)addr; |
1066 | } | 1066 | } |
1067 | if (vmap_page_range(addr, addr + size, prot, pages) < 0) { | 1067 | if (vmap_page_range(addr, addr + size, prot, pages) < 0) { |
1068 | vm_unmap_ram(mem, count); | 1068 | vm_unmap_ram(mem, count); |
1069 | return NULL; | 1069 | return NULL; |
1070 | } | 1070 | } |
1071 | return mem; | 1071 | return mem; |
1072 | } | 1072 | } |
1073 | EXPORT_SYMBOL(vm_map_ram); | 1073 | EXPORT_SYMBOL(vm_map_ram); |
1074 | 1074 | ||
1075 | /** | 1075 | /** |
1076 | * vm_area_register_early - register vmap area early during boot | 1076 | * vm_area_register_early - register vmap area early during boot |
1077 | * @vm: vm_struct to register | 1077 | * @vm: vm_struct to register |
1078 | * @align: requested alignment | 1078 | * @align: requested alignment |
1079 | * | 1079 | * |
1080 | * This function is used to register kernel vm area before | 1080 | * This function is used to register kernel vm area before |
1081 | * vmalloc_init() is called. @vm->size and @vm->flags should contain | 1081 | * vmalloc_init() is called. @vm->size and @vm->flags should contain |
1082 | * proper values on entry and other fields should be zero. On return, | 1082 | * proper values on entry and other fields should be zero. On return, |
1083 | * vm->addr contains the allocated address. | 1083 | * vm->addr contains the allocated address. |
1084 | * | 1084 | * |
1085 | * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. | 1085 | * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. |
1086 | */ | 1086 | */ |
1087 | void __init vm_area_register_early(struct vm_struct *vm, size_t align) | 1087 | void __init vm_area_register_early(struct vm_struct *vm, size_t align) |
1088 | { | 1088 | { |
1089 | static size_t vm_init_off __initdata; | 1089 | static size_t vm_init_off __initdata; |
1090 | unsigned long addr; | 1090 | unsigned long addr; |
1091 | 1091 | ||
1092 | addr = ALIGN(VMALLOC_START + vm_init_off, align); | 1092 | addr = ALIGN(VMALLOC_START + vm_init_off, align); |
1093 | vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; | 1093 | vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; |
1094 | 1094 | ||
1095 | vm->addr = (void *)addr; | 1095 | vm->addr = (void *)addr; |
1096 | 1096 | ||
1097 | vm->next = vmlist; | 1097 | vm->next = vmlist; |
1098 | vmlist = vm; | 1098 | vmlist = vm; |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | void __init vmalloc_init(void) | 1101 | void __init vmalloc_init(void) |
1102 | { | 1102 | { |
1103 | struct vmap_area *va; | 1103 | struct vmap_area *va; |
1104 | struct vm_struct *tmp; | 1104 | struct vm_struct *tmp; |
1105 | int i; | 1105 | int i; |
1106 | 1106 | ||
1107 | for_each_possible_cpu(i) { | 1107 | for_each_possible_cpu(i) { |
1108 | struct vmap_block_queue *vbq; | 1108 | struct vmap_block_queue *vbq; |
1109 | 1109 | ||
1110 | vbq = &per_cpu(vmap_block_queue, i); | 1110 | vbq = &per_cpu(vmap_block_queue, i); |
1111 | spin_lock_init(&vbq->lock); | 1111 | spin_lock_init(&vbq->lock); |
1112 | INIT_LIST_HEAD(&vbq->free); | 1112 | INIT_LIST_HEAD(&vbq->free); |
1113 | } | 1113 | } |
1114 | 1114 | ||
1115 | /* Import existing vmlist entries. */ | 1115 | /* Import existing vmlist entries. */ |
1116 | for (tmp = vmlist; tmp; tmp = tmp->next) { | 1116 | for (tmp = vmlist; tmp; tmp = tmp->next) { |
1117 | va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); | 1117 | va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); |
1118 | va->flags = tmp->flags | VM_VM_AREA; | 1118 | va->flags = tmp->flags | VM_VM_AREA; |
1119 | va->va_start = (unsigned long)tmp->addr; | 1119 | va->va_start = (unsigned long)tmp->addr; |
1120 | va->va_end = va->va_start + tmp->size; | 1120 | va->va_end = va->va_start + tmp->size; |
1121 | __insert_vmap_area(va); | 1121 | __insert_vmap_area(va); |
1122 | } | 1122 | } |
1123 | 1123 | ||
1124 | vmap_area_pcpu_hole = VMALLOC_END; | 1124 | vmap_area_pcpu_hole = VMALLOC_END; |
1125 | 1125 | ||
1126 | vmap_initialized = true; | 1126 | vmap_initialized = true; |
1127 | } | 1127 | } |
1128 | 1128 | ||
1129 | /** | 1129 | /** |
1130 | * map_kernel_range_noflush - map kernel VM area with the specified pages | 1130 | * map_kernel_range_noflush - map kernel VM area with the specified pages |
1131 | * @addr: start of the VM area to map | 1131 | * @addr: start of the VM area to map |
1132 | * @size: size of the VM area to map | 1132 | * @size: size of the VM area to map |
1133 | * @prot: page protection flags to use | 1133 | * @prot: page protection flags to use |
1134 | * @pages: pages to map | 1134 | * @pages: pages to map |
1135 | * | 1135 | * |
1136 | * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size | 1136 | * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size |
1137 | * specify should have been allocated using get_vm_area() and its | 1137 | * specify should have been allocated using get_vm_area() and its |
1138 | * friends. | 1138 | * friends. |
1139 | * | 1139 | * |
1140 | * NOTE: | 1140 | * NOTE: |
1141 | * This function does NOT do any cache flushing. The caller is | 1141 | * This function does NOT do any cache flushing. The caller is |
1142 | * responsible for calling flush_cache_vmap() on to-be-mapped areas | 1142 | * responsible for calling flush_cache_vmap() on to-be-mapped areas |
1143 | * before calling this function. | 1143 | * before calling this function. |
1144 | * | 1144 | * |
1145 | * RETURNS: | 1145 | * RETURNS: |
1146 | * The number of pages mapped on success, -errno on failure. | 1146 | * The number of pages mapped on success, -errno on failure. |
1147 | */ | 1147 | */ |
1148 | int map_kernel_range_noflush(unsigned long addr, unsigned long size, | 1148 | int map_kernel_range_noflush(unsigned long addr, unsigned long size, |
1149 | pgprot_t prot, struct page **pages) | 1149 | pgprot_t prot, struct page **pages) |
1150 | { | 1150 | { |
1151 | return vmap_page_range_noflush(addr, addr + size, prot, pages); | 1151 | return vmap_page_range_noflush(addr, addr + size, prot, pages); |
1152 | } | 1152 | } |
1153 | 1153 | ||
1154 | /** | 1154 | /** |
1155 | * unmap_kernel_range_noflush - unmap kernel VM area | 1155 | * unmap_kernel_range_noflush - unmap kernel VM area |
1156 | * @addr: start of the VM area to unmap | 1156 | * @addr: start of the VM area to unmap |
1157 | * @size: size of the VM area to unmap | 1157 | * @size: size of the VM area to unmap |
1158 | * | 1158 | * |
1159 | * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size | 1159 | * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size |
1160 | * specify should have been allocated using get_vm_area() and its | 1160 | * specify should have been allocated using get_vm_area() and its |
1161 | * friends. | 1161 | * friends. |
1162 | * | 1162 | * |
1163 | * NOTE: | 1163 | * NOTE: |
1164 | * This function does NOT do any cache flushing. The caller is | 1164 | * This function does NOT do any cache flushing. The caller is |
1165 | * responsible for calling flush_cache_vunmap() on to-be-mapped areas | 1165 | * responsible for calling flush_cache_vunmap() on to-be-mapped areas |
1166 | * before calling this function and flush_tlb_kernel_range() after. | 1166 | * before calling this function and flush_tlb_kernel_range() after. |
1167 | */ | 1167 | */ |
1168 | void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) | 1168 | void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) |
1169 | { | 1169 | { |
1170 | vunmap_page_range(addr, addr + size); | 1170 | vunmap_page_range(addr, addr + size); |
1171 | } | 1171 | } |
1172 | 1172 | ||
1173 | /** | 1173 | /** |
1174 | * unmap_kernel_range - unmap kernel VM area and flush cache and TLB | 1174 | * unmap_kernel_range - unmap kernel VM area and flush cache and TLB |
1175 | * @addr: start of the VM area to unmap | 1175 | * @addr: start of the VM area to unmap |
1176 | * @size: size of the VM area to unmap | 1176 | * @size: size of the VM area to unmap |
1177 | * | 1177 | * |
1178 | * Similar to unmap_kernel_range_noflush() but flushes vcache before | 1178 | * Similar to unmap_kernel_range_noflush() but flushes vcache before |
1179 | * the unmapping and tlb after. | 1179 | * the unmapping and tlb after. |
1180 | */ | 1180 | */ |
1181 | void unmap_kernel_range(unsigned long addr, unsigned long size) | 1181 | void unmap_kernel_range(unsigned long addr, unsigned long size) |
1182 | { | 1182 | { |
1183 | unsigned long end = addr + size; | 1183 | unsigned long end = addr + size; |
1184 | 1184 | ||
1185 | flush_cache_vunmap(addr, end); | 1185 | flush_cache_vunmap(addr, end); |
1186 | vunmap_page_range(addr, end); | 1186 | vunmap_page_range(addr, end); |
1187 | flush_tlb_kernel_range(addr, end); | 1187 | flush_tlb_kernel_range(addr, end); |
1188 | } | 1188 | } |
1189 | 1189 | ||
1190 | int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) | 1190 | int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) |
1191 | { | 1191 | { |
1192 | unsigned long addr = (unsigned long)area->addr; | 1192 | unsigned long addr = (unsigned long)area->addr; |
1193 | unsigned long end = addr + area->size - PAGE_SIZE; | 1193 | unsigned long end = addr + area->size - PAGE_SIZE; |
1194 | int err; | 1194 | int err; |
1195 | 1195 | ||
1196 | err = vmap_page_range(addr, end, prot, *pages); | 1196 | err = vmap_page_range(addr, end, prot, *pages); |
1197 | if (err > 0) { | 1197 | if (err > 0) { |
1198 | *pages += err; | 1198 | *pages += err; |
1199 | err = 0; | 1199 | err = 0; |
1200 | } | 1200 | } |
1201 | 1201 | ||
1202 | return err; | 1202 | return err; |
1203 | } | 1203 | } |
1204 | EXPORT_SYMBOL_GPL(map_vm_area); | 1204 | EXPORT_SYMBOL_GPL(map_vm_area); |
1205 | 1205 | ||
1206 | /*** Old vmalloc interfaces ***/ | 1206 | /*** Old vmalloc interfaces ***/ |
1207 | DEFINE_RWLOCK(vmlist_lock); | 1207 | DEFINE_RWLOCK(vmlist_lock); |
1208 | struct vm_struct *vmlist; | 1208 | struct vm_struct *vmlist; |
1209 | 1209 | ||
1210 | static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, | 1210 | static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, |
1211 | unsigned long flags, void *caller) | 1211 | unsigned long flags, void *caller) |
1212 | { | 1212 | { |
1213 | struct vm_struct *tmp, **p; | 1213 | struct vm_struct *tmp, **p; |
1214 | 1214 | ||
1215 | vm->flags = flags; | 1215 | vm->flags = flags; |
1216 | vm->addr = (void *)va->va_start; | 1216 | vm->addr = (void *)va->va_start; |
1217 | vm->size = va->va_end - va->va_start; | 1217 | vm->size = va->va_end - va->va_start; |
1218 | vm->caller = caller; | 1218 | vm->caller = caller; |
1219 | va->private = vm; | 1219 | va->private = vm; |
1220 | va->flags |= VM_VM_AREA; | 1220 | va->flags |= VM_VM_AREA; |
1221 | 1221 | ||
1222 | write_lock(&vmlist_lock); | 1222 | write_lock(&vmlist_lock); |
1223 | for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { | 1223 | for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { |
1224 | if (tmp->addr >= vm->addr) | 1224 | if (tmp->addr >= vm->addr) |
1225 | break; | 1225 | break; |
1226 | } | 1226 | } |
1227 | vm->next = *p; | 1227 | vm->next = *p; |
1228 | *p = vm; | 1228 | *p = vm; |
1229 | write_unlock(&vmlist_lock); | 1229 | write_unlock(&vmlist_lock); |
1230 | } | 1230 | } |
1231 | 1231 | ||
1232 | static struct vm_struct *__get_vm_area_node(unsigned long size, | 1232 | static struct vm_struct *__get_vm_area_node(unsigned long size, |
1233 | unsigned long align, unsigned long flags, unsigned long start, | 1233 | unsigned long align, unsigned long flags, unsigned long start, |
1234 | unsigned long end, int node, gfp_t gfp_mask, void *caller) | 1234 | unsigned long end, int node, gfp_t gfp_mask, void *caller) |
1235 | { | 1235 | { |
1236 | static struct vmap_area *va; | 1236 | static struct vmap_area *va; |
1237 | struct vm_struct *area; | 1237 | struct vm_struct *area; |
1238 | 1238 | ||
1239 | BUG_ON(in_interrupt()); | 1239 | BUG_ON(in_interrupt()); |
1240 | if (flags & VM_IOREMAP) { | 1240 | if (flags & VM_IOREMAP) { |
1241 | int bit = fls(size); | 1241 | int bit = fls(size); |
1242 | 1242 | ||
1243 | if (bit > IOREMAP_MAX_ORDER) | 1243 | if (bit > IOREMAP_MAX_ORDER) |
1244 | bit = IOREMAP_MAX_ORDER; | 1244 | bit = IOREMAP_MAX_ORDER; |
1245 | else if (bit < PAGE_SHIFT) | 1245 | else if (bit < PAGE_SHIFT) |
1246 | bit = PAGE_SHIFT; | 1246 | bit = PAGE_SHIFT; |
1247 | 1247 | ||
1248 | align = 1ul << bit; | 1248 | align = 1ul << bit; |
1249 | } | 1249 | } |
1250 | 1250 | ||
1251 | size = PAGE_ALIGN(size); | 1251 | size = PAGE_ALIGN(size); |
1252 | if (unlikely(!size)) | 1252 | if (unlikely(!size)) |
1253 | return NULL; | 1253 | return NULL; |
1254 | 1254 | ||
1255 | area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); | 1255 | area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); |
1256 | if (unlikely(!area)) | 1256 | if (unlikely(!area)) |
1257 | return NULL; | 1257 | return NULL; |
1258 | 1258 | ||
1259 | /* | 1259 | /* |
1260 | * We always allocate a guard page. | 1260 | * We always allocate a guard page. |
1261 | */ | 1261 | */ |
1262 | size += PAGE_SIZE; | 1262 | size += PAGE_SIZE; |
1263 | 1263 | ||
1264 | va = alloc_vmap_area(size, align, start, end, node, gfp_mask); | 1264 | va = alloc_vmap_area(size, align, start, end, node, gfp_mask); |
1265 | if (IS_ERR(va)) { | 1265 | if (IS_ERR(va)) { |
1266 | kfree(area); | 1266 | kfree(area); |
1267 | return NULL; | 1267 | return NULL; |
1268 | } | 1268 | } |
1269 | 1269 | ||
1270 | insert_vmalloc_vm(area, va, flags, caller); | 1270 | insert_vmalloc_vm(area, va, flags, caller); |
1271 | return area; | 1271 | return area; |
1272 | } | 1272 | } |
1273 | 1273 | ||
1274 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, | 1274 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, |
1275 | unsigned long start, unsigned long end) | 1275 | unsigned long start, unsigned long end) |
1276 | { | 1276 | { |
1277 | return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL, | 1277 | return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL, |
1278 | __builtin_return_address(0)); | 1278 | __builtin_return_address(0)); |
1279 | } | 1279 | } |
1280 | EXPORT_SYMBOL_GPL(__get_vm_area); | 1280 | EXPORT_SYMBOL_GPL(__get_vm_area); |
1281 | 1281 | ||
1282 | struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, | 1282 | struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, |
1283 | unsigned long start, unsigned long end, | 1283 | unsigned long start, unsigned long end, |
1284 | void *caller) | 1284 | void *caller) |
1285 | { | 1285 | { |
1286 | return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL, | 1286 | return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL, |
1287 | caller); | 1287 | caller); |
1288 | } | 1288 | } |
1289 | 1289 | ||
1290 | /** | 1290 | /** |
1291 | * get_vm_area - reserve a contiguous kernel virtual area | 1291 | * get_vm_area - reserve a contiguous kernel virtual area |
1292 | * @size: size of the area | 1292 | * @size: size of the area |
1293 | * @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC | 1293 | * @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC |
1294 | * | 1294 | * |
1295 | * Search an area of @size in the kernel virtual mapping area, | 1295 | * Search an area of @size in the kernel virtual mapping area, |
1296 | * and reserved it for out purposes. Returns the area descriptor | 1296 | * and reserved it for out purposes. Returns the area descriptor |
1297 | * on success or %NULL on failure. | 1297 | * on success or %NULL on failure. |
1298 | */ | 1298 | */ |
1299 | struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) | 1299 | struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) |
1300 | { | 1300 | { |
1301 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, | 1301 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, |
1302 | -1, GFP_KERNEL, __builtin_return_address(0)); | 1302 | -1, GFP_KERNEL, __builtin_return_address(0)); |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, | 1305 | struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, |
1306 | void *caller) | 1306 | void *caller) |
1307 | { | 1307 | { |
1308 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, | 1308 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, |
1309 | -1, GFP_KERNEL, caller); | 1309 | -1, GFP_KERNEL, caller); |
1310 | } | 1310 | } |
1311 | 1311 | ||
1312 | struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, | 1312 | struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, |
1313 | int node, gfp_t gfp_mask) | 1313 | int node, gfp_t gfp_mask) |
1314 | { | 1314 | { |
1315 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, | 1315 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, |
1316 | node, gfp_mask, __builtin_return_address(0)); | 1316 | node, gfp_mask, __builtin_return_address(0)); |
1317 | } | 1317 | } |
1318 | 1318 | ||
1319 | static struct vm_struct *find_vm_area(const void *addr) | 1319 | static struct vm_struct *find_vm_area(const void *addr) |
1320 | { | 1320 | { |
1321 | struct vmap_area *va; | 1321 | struct vmap_area *va; |
1322 | 1322 | ||
1323 | va = find_vmap_area((unsigned long)addr); | 1323 | va = find_vmap_area((unsigned long)addr); |
1324 | if (va && va->flags & VM_VM_AREA) | 1324 | if (va && va->flags & VM_VM_AREA) |
1325 | return va->private; | 1325 | return va->private; |
1326 | 1326 | ||
1327 | return NULL; | 1327 | return NULL; |
1328 | } | 1328 | } |
1329 | 1329 | ||
1330 | /** | 1330 | /** |
1331 | * remove_vm_area - find and remove a continuous kernel virtual area | 1331 | * remove_vm_area - find and remove a continuous kernel virtual area |
1332 | * @addr: base address | 1332 | * @addr: base address |
1333 | * | 1333 | * |
1334 | * Search for the kernel VM area starting at @addr, and remove it. | 1334 | * Search for the kernel VM area starting at @addr, and remove it. |
1335 | * This function returns the found VM area, but using it is NOT safe | 1335 | * This function returns the found VM area, but using it is NOT safe |
1336 | * on SMP machines, except for its size or flags. | 1336 | * on SMP machines, except for its size or flags. |
1337 | */ | 1337 | */ |
1338 | struct vm_struct *remove_vm_area(const void *addr) | 1338 | struct vm_struct *remove_vm_area(const void *addr) |
1339 | { | 1339 | { |
1340 | struct vmap_area *va; | 1340 | struct vmap_area *va; |
1341 | 1341 | ||
1342 | va = find_vmap_area((unsigned long)addr); | 1342 | va = find_vmap_area((unsigned long)addr); |
1343 | if (va && va->flags & VM_VM_AREA) { | 1343 | if (va && va->flags & VM_VM_AREA) { |
1344 | struct vm_struct *vm = va->private; | 1344 | struct vm_struct *vm = va->private; |
1345 | struct vm_struct *tmp, **p; | 1345 | struct vm_struct *tmp, **p; |
1346 | /* | 1346 | /* |
1347 | * remove from list and disallow access to this vm_struct | 1347 | * remove from list and disallow access to this vm_struct |
1348 | * before unmap. (address range confliction is maintained by | 1348 | * before unmap. (address range confliction is maintained by |
1349 | * vmap.) | 1349 | * vmap.) |
1350 | */ | 1350 | */ |
1351 | write_lock(&vmlist_lock); | 1351 | write_lock(&vmlist_lock); |
1352 | for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) | 1352 | for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) |
1353 | ; | 1353 | ; |
1354 | *p = tmp->next; | 1354 | *p = tmp->next; |
1355 | write_unlock(&vmlist_lock); | 1355 | write_unlock(&vmlist_lock); |
1356 | 1356 | ||
1357 | vmap_debug_free_range(va->va_start, va->va_end); | 1357 | vmap_debug_free_range(va->va_start, va->va_end); |
1358 | free_unmap_vmap_area(va); | 1358 | free_unmap_vmap_area(va); |
1359 | vm->size -= PAGE_SIZE; | 1359 | vm->size -= PAGE_SIZE; |
1360 | 1360 | ||
1361 | return vm; | 1361 | return vm; |
1362 | } | 1362 | } |
1363 | return NULL; | 1363 | return NULL; |
1364 | } | 1364 | } |
1365 | 1365 | ||
1366 | static void __vunmap(const void *addr, int deallocate_pages) | 1366 | static void __vunmap(const void *addr, int deallocate_pages) |
1367 | { | 1367 | { |
1368 | struct vm_struct *area; | 1368 | struct vm_struct *area; |
1369 | 1369 | ||
1370 | if (!addr) | 1370 | if (!addr) |
1371 | return; | 1371 | return; |
1372 | 1372 | ||
1373 | if ((PAGE_SIZE-1) & (unsigned long)addr) { | 1373 | if ((PAGE_SIZE-1) & (unsigned long)addr) { |
1374 | WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr); | 1374 | WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr); |
1375 | return; | 1375 | return; |
1376 | } | 1376 | } |
1377 | 1377 | ||
1378 | area = remove_vm_area(addr); | 1378 | area = remove_vm_area(addr); |
1379 | if (unlikely(!area)) { | 1379 | if (unlikely(!area)) { |
1380 | WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", | 1380 | WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", |
1381 | addr); | 1381 | addr); |
1382 | return; | 1382 | return; |
1383 | } | 1383 | } |
1384 | 1384 | ||
1385 | debug_check_no_locks_freed(addr, area->size); | 1385 | debug_check_no_locks_freed(addr, area->size); |
1386 | debug_check_no_obj_freed(addr, area->size); | 1386 | debug_check_no_obj_freed(addr, area->size); |
1387 | 1387 | ||
1388 | if (deallocate_pages) { | 1388 | if (deallocate_pages) { |
1389 | int i; | 1389 | int i; |
1390 | 1390 | ||
1391 | for (i = 0; i < area->nr_pages; i++) { | 1391 | for (i = 0; i < area->nr_pages; i++) { |
1392 | struct page *page = area->pages[i]; | 1392 | struct page *page = area->pages[i]; |
1393 | 1393 | ||
1394 | BUG_ON(!page); | 1394 | BUG_ON(!page); |
1395 | __free_page(page); | 1395 | __free_page(page); |
1396 | } | 1396 | } |
1397 | 1397 | ||
1398 | if (area->flags & VM_VPAGES) | 1398 | if (area->flags & VM_VPAGES) |
1399 | vfree(area->pages); | 1399 | vfree(area->pages); |
1400 | else | 1400 | else |
1401 | kfree(area->pages); | 1401 | kfree(area->pages); |
1402 | } | 1402 | } |
1403 | 1403 | ||
1404 | kfree(area); | 1404 | kfree(area); |
1405 | return; | 1405 | return; |
1406 | } | 1406 | } |
1407 | 1407 | ||
1408 | /** | 1408 | /** |
1409 | * vfree - release memory allocated by vmalloc() | 1409 | * vfree - release memory allocated by vmalloc() |
1410 | * @addr: memory base address | 1410 | * @addr: memory base address |
1411 | * | 1411 | * |
1412 | * Free the virtually continuous memory area starting at @addr, as | 1412 | * Free the virtually continuous memory area starting at @addr, as |
1413 | * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is | 1413 | * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is |
1414 | * NULL, no operation is performed. | 1414 | * NULL, no operation is performed. |
1415 | * | 1415 | * |
1416 | * Must not be called in interrupt context. | 1416 | * Must not be called in interrupt context. |
1417 | */ | 1417 | */ |
1418 | void vfree(const void *addr) | 1418 | void vfree(const void *addr) |
1419 | { | 1419 | { |
1420 | BUG_ON(in_interrupt()); | 1420 | BUG_ON(in_interrupt()); |
1421 | 1421 | ||
1422 | kmemleak_free(addr); | 1422 | kmemleak_free(addr); |
1423 | 1423 | ||
1424 | __vunmap(addr, 1); | 1424 | __vunmap(addr, 1); |
1425 | } | 1425 | } |
1426 | EXPORT_SYMBOL(vfree); | 1426 | EXPORT_SYMBOL(vfree); |
1427 | 1427 | ||
1428 | /** | 1428 | /** |
1429 | * vunmap - release virtual mapping obtained by vmap() | 1429 | * vunmap - release virtual mapping obtained by vmap() |
1430 | * @addr: memory base address | 1430 | * @addr: memory base address |
1431 | * | 1431 | * |
1432 | * Free the virtually contiguous memory area starting at @addr, | 1432 | * Free the virtually contiguous memory area starting at @addr, |
1433 | * which was created from the page array passed to vmap(). | 1433 | * which was created from the page array passed to vmap(). |
1434 | * | 1434 | * |
1435 | * Must not be called in interrupt context. | 1435 | * Must not be called in interrupt context. |
1436 | */ | 1436 | */ |
1437 | void vunmap(const void *addr) | 1437 | void vunmap(const void *addr) |
1438 | { | 1438 | { |
1439 | BUG_ON(in_interrupt()); | 1439 | BUG_ON(in_interrupt()); |
1440 | might_sleep(); | 1440 | might_sleep(); |
1441 | __vunmap(addr, 0); | 1441 | __vunmap(addr, 0); |
1442 | } | 1442 | } |
1443 | EXPORT_SYMBOL(vunmap); | 1443 | EXPORT_SYMBOL(vunmap); |
1444 | 1444 | ||
1445 | /** | 1445 | /** |
1446 | * vmap - map an array of pages into virtually contiguous space | 1446 | * vmap - map an array of pages into virtually contiguous space |
1447 | * @pages: array of page pointers | 1447 | * @pages: array of page pointers |
1448 | * @count: number of pages to map | 1448 | * @count: number of pages to map |
1449 | * @flags: vm_area->flags | 1449 | * @flags: vm_area->flags |
1450 | * @prot: page protection for the mapping | 1450 | * @prot: page protection for the mapping |
1451 | * | 1451 | * |
1452 | * Maps @count pages from @pages into contiguous kernel virtual | 1452 | * Maps @count pages from @pages into contiguous kernel virtual |
1453 | * space. | 1453 | * space. |
1454 | */ | 1454 | */ |
1455 | void *vmap(struct page **pages, unsigned int count, | 1455 | void *vmap(struct page **pages, unsigned int count, |
1456 | unsigned long flags, pgprot_t prot) | 1456 | unsigned long flags, pgprot_t prot) |
1457 | { | 1457 | { |
1458 | struct vm_struct *area; | 1458 | struct vm_struct *area; |
1459 | 1459 | ||
1460 | might_sleep(); | 1460 | might_sleep(); |
1461 | 1461 | ||
1462 | if (count > totalram_pages) | 1462 | if (count > totalram_pages) |
1463 | return NULL; | 1463 | return NULL; |
1464 | 1464 | ||
1465 | area = get_vm_area_caller((count << PAGE_SHIFT), flags, | 1465 | area = get_vm_area_caller((count << PAGE_SHIFT), flags, |
1466 | __builtin_return_address(0)); | 1466 | __builtin_return_address(0)); |
1467 | if (!area) | 1467 | if (!area) |
1468 | return NULL; | 1468 | return NULL; |
1469 | 1469 | ||
1470 | if (map_vm_area(area, prot, &pages)) { | 1470 | if (map_vm_area(area, prot, &pages)) { |
1471 | vunmap(area->addr); | 1471 | vunmap(area->addr); |
1472 | return NULL; | 1472 | return NULL; |
1473 | } | 1473 | } |
1474 | 1474 | ||
1475 | return area->addr; | 1475 | return area->addr; |
1476 | } | 1476 | } |
1477 | EXPORT_SYMBOL(vmap); | 1477 | EXPORT_SYMBOL(vmap); |
1478 | 1478 | ||
1479 | static void *__vmalloc_node(unsigned long size, unsigned long align, | 1479 | static void *__vmalloc_node(unsigned long size, unsigned long align, |
1480 | gfp_t gfp_mask, pgprot_t prot, | 1480 | gfp_t gfp_mask, pgprot_t prot, |
1481 | int node, void *caller); | 1481 | int node, void *caller); |
1482 | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | 1482 | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, |
1483 | pgprot_t prot, int node, void *caller) | 1483 | pgprot_t prot, int node, void *caller) |
1484 | { | 1484 | { |
1485 | struct page **pages; | 1485 | struct page **pages; |
1486 | unsigned int nr_pages, array_size, i; | 1486 | unsigned int nr_pages, array_size, i; |
1487 | gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; | 1487 | gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; |
1488 | 1488 | ||
1489 | nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; | 1489 | nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; |
1490 | array_size = (nr_pages * sizeof(struct page *)); | 1490 | array_size = (nr_pages * sizeof(struct page *)); |
1491 | 1491 | ||
1492 | area->nr_pages = nr_pages; | 1492 | area->nr_pages = nr_pages; |
1493 | /* Please note that the recursion is strictly bounded. */ | 1493 | /* Please note that the recursion is strictly bounded. */ |
1494 | if (array_size > PAGE_SIZE) { | 1494 | if (array_size > PAGE_SIZE) { |
1495 | pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, | 1495 | pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, |
1496 | PAGE_KERNEL, node, caller); | 1496 | PAGE_KERNEL, node, caller); |
1497 | area->flags |= VM_VPAGES; | 1497 | area->flags |= VM_VPAGES; |
1498 | } else { | 1498 | } else { |
1499 | pages = kmalloc_node(array_size, nested_gfp, node); | 1499 | pages = kmalloc_node(array_size, nested_gfp, node); |
1500 | } | 1500 | } |
1501 | area->pages = pages; | 1501 | area->pages = pages; |
1502 | area->caller = caller; | 1502 | area->caller = caller; |
1503 | if (!area->pages) { | 1503 | if (!area->pages) { |
1504 | remove_vm_area(area->addr); | 1504 | remove_vm_area(area->addr); |
1505 | kfree(area); | 1505 | kfree(area); |
1506 | return NULL; | 1506 | return NULL; |
1507 | } | 1507 | } |
1508 | 1508 | ||
1509 | for (i = 0; i < area->nr_pages; i++) { | 1509 | for (i = 0; i < area->nr_pages; i++) { |
1510 | struct page *page; | 1510 | struct page *page; |
1511 | 1511 | ||
1512 | if (node < 0) | 1512 | if (node < 0) |
1513 | page = alloc_page(gfp_mask); | 1513 | page = alloc_page(gfp_mask); |
1514 | else | 1514 | else |
1515 | page = alloc_pages_node(node, gfp_mask, 0); | 1515 | page = alloc_pages_node(node, gfp_mask, 0); |
1516 | 1516 | ||
1517 | if (unlikely(!page)) { | 1517 | if (unlikely(!page)) { |
1518 | /* Successfully allocated i pages, free them in __vunmap() */ | 1518 | /* Successfully allocated i pages, free them in __vunmap() */ |
1519 | area->nr_pages = i; | 1519 | area->nr_pages = i; |
1520 | goto fail; | 1520 | goto fail; |
1521 | } | 1521 | } |
1522 | area->pages[i] = page; | 1522 | area->pages[i] = page; |
1523 | } | 1523 | } |
1524 | 1524 | ||
1525 | if (map_vm_area(area, prot, &pages)) | 1525 | if (map_vm_area(area, prot, &pages)) |
1526 | goto fail; | 1526 | goto fail; |
1527 | return area->addr; | 1527 | return area->addr; |
1528 | 1528 | ||
1529 | fail: | 1529 | fail: |
1530 | vfree(area->addr); | 1530 | vfree(area->addr); |
1531 | return NULL; | 1531 | return NULL; |
1532 | } | 1532 | } |
1533 | 1533 | ||
1534 | void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) | 1534 | void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) |
1535 | { | 1535 | { |
1536 | void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1, | 1536 | void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1, |
1537 | __builtin_return_address(0)); | 1537 | __builtin_return_address(0)); |
1538 | 1538 | ||
1539 | /* | 1539 | /* |
1540 | * A ref_count = 3 is needed because the vm_struct and vmap_area | 1540 | * A ref_count = 3 is needed because the vm_struct and vmap_area |
1541 | * structures allocated in the __get_vm_area_node() function contain | 1541 | * structures allocated in the __get_vm_area_node() function contain |
1542 | * references to the virtual address of the vmalloc'ed block. | 1542 | * references to the virtual address of the vmalloc'ed block. |
1543 | */ | 1543 | */ |
1544 | kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask); | 1544 | kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask); |
1545 | 1545 | ||
1546 | return addr; | 1546 | return addr; |
1547 | } | 1547 | } |
1548 | 1548 | ||
1549 | /** | 1549 | /** |
1550 | * __vmalloc_node - allocate virtually contiguous memory | 1550 | * __vmalloc_node - allocate virtually contiguous memory |
1551 | * @size: allocation size | 1551 | * @size: allocation size |
1552 | * @align: desired alignment | 1552 | * @align: desired alignment |
1553 | * @gfp_mask: flags for the page level allocator | 1553 | * @gfp_mask: flags for the page level allocator |
1554 | * @prot: protection mask for the allocated pages | 1554 | * @prot: protection mask for the allocated pages |
1555 | * @node: node to use for allocation or -1 | 1555 | * @node: node to use for allocation or -1 |
1556 | * @caller: caller's return address | 1556 | * @caller: caller's return address |
1557 | * | 1557 | * |
1558 | * Allocate enough pages to cover @size from the page level | 1558 | * Allocate enough pages to cover @size from the page level |
1559 | * allocator with @gfp_mask flags. Map them into contiguous | 1559 | * allocator with @gfp_mask flags. Map them into contiguous |
1560 | * kernel virtual space, using a pagetable protection of @prot. | 1560 | * kernel virtual space, using a pagetable protection of @prot. |
1561 | */ | 1561 | */ |
1562 | static void *__vmalloc_node(unsigned long size, unsigned long align, | 1562 | static void *__vmalloc_node(unsigned long size, unsigned long align, |
1563 | gfp_t gfp_mask, pgprot_t prot, | 1563 | gfp_t gfp_mask, pgprot_t prot, |
1564 | int node, void *caller) | 1564 | int node, void *caller) |
1565 | { | 1565 | { |
1566 | struct vm_struct *area; | 1566 | struct vm_struct *area; |
1567 | void *addr; | 1567 | void *addr; |
1568 | unsigned long real_size = size; | 1568 | unsigned long real_size = size; |
1569 | 1569 | ||
1570 | size = PAGE_ALIGN(size); | 1570 | size = PAGE_ALIGN(size); |
1571 | if (!size || (size >> PAGE_SHIFT) > totalram_pages) | 1571 | if (!size || (size >> PAGE_SHIFT) > totalram_pages) |
1572 | return NULL; | 1572 | return NULL; |
1573 | 1573 | ||
1574 | area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START, | 1574 | area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START, |
1575 | VMALLOC_END, node, gfp_mask, caller); | 1575 | VMALLOC_END, node, gfp_mask, caller); |
1576 | 1576 | ||
1577 | if (!area) | 1577 | if (!area) |
1578 | return NULL; | 1578 | return NULL; |
1579 | 1579 | ||
1580 | addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); | 1580 | addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); |
1581 | 1581 | ||
1582 | /* | 1582 | /* |
1583 | * A ref_count = 3 is needed because the vm_struct and vmap_area | 1583 | * A ref_count = 3 is needed because the vm_struct and vmap_area |
1584 | * structures allocated in the __get_vm_area_node() function contain | 1584 | * structures allocated in the __get_vm_area_node() function contain |
1585 | * references to the virtual address of the vmalloc'ed block. | 1585 | * references to the virtual address of the vmalloc'ed block. |
1586 | */ | 1586 | */ |
1587 | kmemleak_alloc(addr, real_size, 3, gfp_mask); | 1587 | kmemleak_alloc(addr, real_size, 3, gfp_mask); |
1588 | 1588 | ||
1589 | return addr; | 1589 | return addr; |
1590 | } | 1590 | } |
1591 | 1591 | ||
1592 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) | 1592 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) |
1593 | { | 1593 | { |
1594 | return __vmalloc_node(size, 1, gfp_mask, prot, -1, | 1594 | return __vmalloc_node(size, 1, gfp_mask, prot, -1, |
1595 | __builtin_return_address(0)); | 1595 | __builtin_return_address(0)); |
1596 | } | 1596 | } |
1597 | EXPORT_SYMBOL(__vmalloc); | 1597 | EXPORT_SYMBOL(__vmalloc); |
1598 | 1598 | ||
1599 | static inline void *__vmalloc_node_flags(unsigned long size, | ||
1600 | int node, gfp_t flags) | ||
1601 | { | ||
1602 | return __vmalloc_node(size, 1, flags, PAGE_KERNEL, | ||
1603 | node, __builtin_return_address(0)); | ||
1604 | } | ||
1605 | |||
1599 | /** | 1606 | /** |
1600 | * vmalloc - allocate virtually contiguous memory | 1607 | * vmalloc - allocate virtually contiguous memory |
1601 | * @size: allocation size | 1608 | * @size: allocation size |
1602 | * Allocate enough pages to cover @size from the page level | 1609 | * Allocate enough pages to cover @size from the page level |
1603 | * allocator and map them into contiguous kernel virtual space. | 1610 | * allocator and map them into contiguous kernel virtual space. |
1604 | * | 1611 | * |
1605 | * For tight control over page level allocator and protection flags | 1612 | * For tight control over page level allocator and protection flags |
1606 | * use __vmalloc() instead. | 1613 | * use __vmalloc() instead. |
1607 | */ | 1614 | */ |
1608 | void *vmalloc(unsigned long size) | 1615 | void *vmalloc(unsigned long size) |
1609 | { | 1616 | { |
1610 | return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, | 1617 | return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM); |
1611 | -1, __builtin_return_address(0)); | ||
1612 | } | 1618 | } |
1613 | EXPORT_SYMBOL(vmalloc); | 1619 | EXPORT_SYMBOL(vmalloc); |
1614 | 1620 | ||
1615 | /** | 1621 | /** |
1622 | * vzalloc - allocate virtually contiguous memory with zero fill | ||
1623 | * @size: allocation size | ||
1624 | * Allocate enough pages to cover @size from the page level | ||
1625 | * allocator and map them into contiguous kernel virtual space. | ||
1626 | * The memory allocated is set to zero. | ||
1627 | * | ||
1628 | * For tight control over page level allocator and protection flags | ||
1629 | * use __vmalloc() instead. | ||
1630 | */ | ||
1631 | void *vzalloc(unsigned long size) | ||
1632 | { | ||
1633 | return __vmalloc_node_flags(size, -1, | ||
1634 | GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); | ||
1635 | } | ||
1636 | EXPORT_SYMBOL(vzalloc); | ||
1637 | |||
1638 | /** | ||
1616 | * vmalloc_user - allocate zeroed virtually contiguous memory for userspace | 1639 | * vmalloc_user - allocate zeroed virtually contiguous memory for userspace |
1617 | * @size: allocation size | 1640 | * @size: allocation size |
1618 | * | 1641 | * |
1619 | * The resulting memory area is zeroed so it can be mapped to userspace | 1642 | * The resulting memory area is zeroed so it can be mapped to userspace |
1620 | * without leaking data. | 1643 | * without leaking data. |
1621 | */ | 1644 | */ |
1622 | void *vmalloc_user(unsigned long size) | 1645 | void *vmalloc_user(unsigned long size) |
1623 | { | 1646 | { |
1624 | struct vm_struct *area; | 1647 | struct vm_struct *area; |
1625 | void *ret; | 1648 | void *ret; |
1626 | 1649 | ||
1627 | ret = __vmalloc_node(size, SHMLBA, | 1650 | ret = __vmalloc_node(size, SHMLBA, |
1628 | GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, | 1651 | GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, |
1629 | PAGE_KERNEL, -1, __builtin_return_address(0)); | 1652 | PAGE_KERNEL, -1, __builtin_return_address(0)); |
1630 | if (ret) { | 1653 | if (ret) { |
1631 | area = find_vm_area(ret); | 1654 | area = find_vm_area(ret); |
1632 | area->flags |= VM_USERMAP; | 1655 | area->flags |= VM_USERMAP; |
1633 | } | 1656 | } |
1634 | return ret; | 1657 | return ret; |
1635 | } | 1658 | } |
1636 | EXPORT_SYMBOL(vmalloc_user); | 1659 | EXPORT_SYMBOL(vmalloc_user); |
1637 | 1660 | ||
1638 | /** | 1661 | /** |
1639 | * vmalloc_node - allocate memory on a specific node | 1662 | * vmalloc_node - allocate memory on a specific node |
1640 | * @size: allocation size | 1663 | * @size: allocation size |
1641 | * @node: numa node | 1664 | * @node: numa node |
1642 | * | 1665 | * |
1643 | * Allocate enough pages to cover @size from the page level | 1666 | * Allocate enough pages to cover @size from the page level |
1644 | * allocator and map them into contiguous kernel virtual space. | 1667 | * allocator and map them into contiguous kernel virtual space. |
1645 | * | 1668 | * |
1646 | * For tight control over page level allocator and protection flags | 1669 | * For tight control over page level allocator and protection flags |
1647 | * use __vmalloc() instead. | 1670 | * use __vmalloc() instead. |
1648 | */ | 1671 | */ |
1649 | void *vmalloc_node(unsigned long size, int node) | 1672 | void *vmalloc_node(unsigned long size, int node) |
1650 | { | 1673 | { |
1651 | return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, | 1674 | return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, |
1652 | node, __builtin_return_address(0)); | 1675 | node, __builtin_return_address(0)); |
1653 | } | 1676 | } |
1654 | EXPORT_SYMBOL(vmalloc_node); | 1677 | EXPORT_SYMBOL(vmalloc_node); |
1678 | |||
1679 | /** | ||
1680 | * vzalloc_node - allocate memory on a specific node with zero fill | ||
1681 | * @size: allocation size | ||
1682 | * @node: numa node | ||
1683 | * | ||
1684 | * Allocate enough pages to cover @size from the page level | ||
1685 | * allocator and map them into contiguous kernel virtual space. | ||
1686 | * The memory allocated is set to zero. | ||
1687 | * | ||
1688 | * For tight control over page level allocator and protection flags | ||
1689 | * use __vmalloc_node() instead. | ||
1690 | */ | ||
1691 | void *vzalloc_node(unsigned long size, int node) | ||
1692 | { | ||
1693 | return __vmalloc_node_flags(size, node, | ||
1694 | GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); | ||
1695 | } | ||
1696 | EXPORT_SYMBOL(vzalloc_node); | ||
1655 | 1697 | ||
1656 | #ifndef PAGE_KERNEL_EXEC | 1698 | #ifndef PAGE_KERNEL_EXEC |
1657 | # define PAGE_KERNEL_EXEC PAGE_KERNEL | 1699 | # define PAGE_KERNEL_EXEC PAGE_KERNEL |
1658 | #endif | 1700 | #endif |
1659 | 1701 | ||
1660 | /** | 1702 | /** |
1661 | * vmalloc_exec - allocate virtually contiguous, executable memory | 1703 | * vmalloc_exec - allocate virtually contiguous, executable memory |
1662 | * @size: allocation size | 1704 | * @size: allocation size |
1663 | * | 1705 | * |
1664 | * Kernel-internal function to allocate enough pages to cover @size | 1706 | * Kernel-internal function to allocate enough pages to cover @size |
1665 | * the page level allocator and map them into contiguous and | 1707 | * the page level allocator and map them into contiguous and |
1666 | * executable kernel virtual space. | 1708 | * executable kernel virtual space. |
1667 | * | 1709 | * |
1668 | * For tight control over page level allocator and protection flags | 1710 | * For tight control over page level allocator and protection flags |
1669 | * use __vmalloc() instead. | 1711 | * use __vmalloc() instead. |
1670 | */ | 1712 | */ |
1671 | 1713 | ||
1672 | void *vmalloc_exec(unsigned long size) | 1714 | void *vmalloc_exec(unsigned long size) |
1673 | { | 1715 | { |
1674 | return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, | 1716 | return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, |
1675 | -1, __builtin_return_address(0)); | 1717 | -1, __builtin_return_address(0)); |
1676 | } | 1718 | } |
1677 | 1719 | ||
1678 | #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) | 1720 | #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) |
1679 | #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL | 1721 | #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL |
1680 | #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) | 1722 | #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) |
1681 | #define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL | 1723 | #define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL |
1682 | #else | 1724 | #else |
1683 | #define GFP_VMALLOC32 GFP_KERNEL | 1725 | #define GFP_VMALLOC32 GFP_KERNEL |
1684 | #endif | 1726 | #endif |
1685 | 1727 | ||
1686 | /** | 1728 | /** |
1687 | * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) | 1729 | * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) |
1688 | * @size: allocation size | 1730 | * @size: allocation size |
1689 | * | 1731 | * |
1690 | * Allocate enough 32bit PA addressable pages to cover @size from the | 1732 | * Allocate enough 32bit PA addressable pages to cover @size from the |
1691 | * page level allocator and map them into contiguous kernel virtual space. | 1733 | * page level allocator and map them into contiguous kernel virtual space. |
1692 | */ | 1734 | */ |
1693 | void *vmalloc_32(unsigned long size) | 1735 | void *vmalloc_32(unsigned long size) |
1694 | { | 1736 | { |
1695 | return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL, | 1737 | return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL, |
1696 | -1, __builtin_return_address(0)); | 1738 | -1, __builtin_return_address(0)); |
1697 | } | 1739 | } |
1698 | EXPORT_SYMBOL(vmalloc_32); | 1740 | EXPORT_SYMBOL(vmalloc_32); |
1699 | 1741 | ||
1700 | /** | 1742 | /** |
1701 | * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory | 1743 | * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory |
1702 | * @size: allocation size | 1744 | * @size: allocation size |
1703 | * | 1745 | * |
1704 | * The resulting memory area is 32bit addressable and zeroed so it can be | 1746 | * The resulting memory area is 32bit addressable and zeroed so it can be |
1705 | * mapped to userspace without leaking data. | 1747 | * mapped to userspace without leaking data. |
1706 | */ | 1748 | */ |
1707 | void *vmalloc_32_user(unsigned long size) | 1749 | void *vmalloc_32_user(unsigned long size) |
1708 | { | 1750 | { |
1709 | struct vm_struct *area; | 1751 | struct vm_struct *area; |
1710 | void *ret; | 1752 | void *ret; |
1711 | 1753 | ||
1712 | ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, | 1754 | ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, |
1713 | -1, __builtin_return_address(0)); | 1755 | -1, __builtin_return_address(0)); |
1714 | if (ret) { | 1756 | if (ret) { |
1715 | area = find_vm_area(ret); | 1757 | area = find_vm_area(ret); |
1716 | area->flags |= VM_USERMAP; | 1758 | area->flags |= VM_USERMAP; |
1717 | } | 1759 | } |
1718 | return ret; | 1760 | return ret; |
1719 | } | 1761 | } |
1720 | EXPORT_SYMBOL(vmalloc_32_user); | 1762 | EXPORT_SYMBOL(vmalloc_32_user); |
1721 | 1763 | ||
1722 | /* | 1764 | /* |
1723 | * small helper routine , copy contents to buf from addr. | 1765 | * small helper routine , copy contents to buf from addr. |
1724 | * If the page is not present, fill zero. | 1766 | * If the page is not present, fill zero. |
1725 | */ | 1767 | */ |
1726 | 1768 | ||
1727 | static int aligned_vread(char *buf, char *addr, unsigned long count) | 1769 | static int aligned_vread(char *buf, char *addr, unsigned long count) |
1728 | { | 1770 | { |
1729 | struct page *p; | 1771 | struct page *p; |
1730 | int copied = 0; | 1772 | int copied = 0; |
1731 | 1773 | ||
1732 | while (count) { | 1774 | while (count) { |
1733 | unsigned long offset, length; | 1775 | unsigned long offset, length; |
1734 | 1776 | ||
1735 | offset = (unsigned long)addr & ~PAGE_MASK; | 1777 | offset = (unsigned long)addr & ~PAGE_MASK; |
1736 | length = PAGE_SIZE - offset; | 1778 | length = PAGE_SIZE - offset; |
1737 | if (length > count) | 1779 | if (length > count) |
1738 | length = count; | 1780 | length = count; |
1739 | p = vmalloc_to_page(addr); | 1781 | p = vmalloc_to_page(addr); |
1740 | /* | 1782 | /* |
1741 | * To do safe access to this _mapped_ area, we need | 1783 | * To do safe access to this _mapped_ area, we need |
1742 | * lock. But adding lock here means that we need to add | 1784 | * lock. But adding lock here means that we need to add |
1743 | * overhead of vmalloc()/vfree() calles for this _debug_ | 1785 | * overhead of vmalloc()/vfree() calles for this _debug_ |
1744 | * interface, rarely used. Instead of that, we'll use | 1786 | * interface, rarely used. Instead of that, we'll use |
1745 | * kmap() and get small overhead in this access function. | 1787 | * kmap() and get small overhead in this access function. |
1746 | */ | 1788 | */ |
1747 | if (p) { | 1789 | if (p) { |
1748 | /* | 1790 | /* |
1749 | * we can expect USER0 is not used (see vread/vwrite's | 1791 | * we can expect USER0 is not used (see vread/vwrite's |
1750 | * function description) | 1792 | * function description) |
1751 | */ | 1793 | */ |
1752 | void *map = kmap_atomic(p, KM_USER0); | 1794 | void *map = kmap_atomic(p, KM_USER0); |
1753 | memcpy(buf, map + offset, length); | 1795 | memcpy(buf, map + offset, length); |
1754 | kunmap_atomic(map, KM_USER0); | 1796 | kunmap_atomic(map, KM_USER0); |
1755 | } else | 1797 | } else |
1756 | memset(buf, 0, length); | 1798 | memset(buf, 0, length); |
1757 | 1799 | ||
1758 | addr += length; | 1800 | addr += length; |
1759 | buf += length; | 1801 | buf += length; |
1760 | copied += length; | 1802 | copied += length; |
1761 | count -= length; | 1803 | count -= length; |
1762 | } | 1804 | } |
1763 | return copied; | 1805 | return copied; |
1764 | } | 1806 | } |
1765 | 1807 | ||
1766 | static int aligned_vwrite(char *buf, char *addr, unsigned long count) | 1808 | static int aligned_vwrite(char *buf, char *addr, unsigned long count) |
1767 | { | 1809 | { |
1768 | struct page *p; | 1810 | struct page *p; |
1769 | int copied = 0; | 1811 | int copied = 0; |
1770 | 1812 | ||
1771 | while (count) { | 1813 | while (count) { |
1772 | unsigned long offset, length; | 1814 | unsigned long offset, length; |
1773 | 1815 | ||
1774 | offset = (unsigned long)addr & ~PAGE_MASK; | 1816 | offset = (unsigned long)addr & ~PAGE_MASK; |
1775 | length = PAGE_SIZE - offset; | 1817 | length = PAGE_SIZE - offset; |
1776 | if (length > count) | 1818 | if (length > count) |
1777 | length = count; | 1819 | length = count; |
1778 | p = vmalloc_to_page(addr); | 1820 | p = vmalloc_to_page(addr); |
1779 | /* | 1821 | /* |
1780 | * To do safe access to this _mapped_ area, we need | 1822 | * To do safe access to this _mapped_ area, we need |
1781 | * lock. But adding lock here means that we need to add | 1823 | * lock. But adding lock here means that we need to add |
1782 | * overhead of vmalloc()/vfree() calles for this _debug_ | 1824 | * overhead of vmalloc()/vfree() calles for this _debug_ |
1783 | * interface, rarely used. Instead of that, we'll use | 1825 | * interface, rarely used. Instead of that, we'll use |
1784 | * kmap() and get small overhead in this access function. | 1826 | * kmap() and get small overhead in this access function. |
1785 | */ | 1827 | */ |
1786 | if (p) { | 1828 | if (p) { |
1787 | /* | 1829 | /* |
1788 | * we can expect USER0 is not used (see vread/vwrite's | 1830 | * we can expect USER0 is not used (see vread/vwrite's |
1789 | * function description) | 1831 | * function description) |
1790 | */ | 1832 | */ |
1791 | void *map = kmap_atomic(p, KM_USER0); | 1833 | void *map = kmap_atomic(p, KM_USER0); |
1792 | memcpy(map + offset, buf, length); | 1834 | memcpy(map + offset, buf, length); |
1793 | kunmap_atomic(map, KM_USER0); | 1835 | kunmap_atomic(map, KM_USER0); |
1794 | } | 1836 | } |
1795 | addr += length; | 1837 | addr += length; |
1796 | buf += length; | 1838 | buf += length; |
1797 | copied += length; | 1839 | copied += length; |
1798 | count -= length; | 1840 | count -= length; |
1799 | } | 1841 | } |
1800 | return copied; | 1842 | return copied; |
1801 | } | 1843 | } |
1802 | 1844 | ||
1803 | /** | 1845 | /** |
1804 | * vread() - read vmalloc area in a safe way. | 1846 | * vread() - read vmalloc area in a safe way. |
1805 | * @buf: buffer for reading data | 1847 | * @buf: buffer for reading data |
1806 | * @addr: vm address. | 1848 | * @addr: vm address. |
1807 | * @count: number of bytes to be read. | 1849 | * @count: number of bytes to be read. |
1808 | * | 1850 | * |
1809 | * Returns # of bytes which addr and buf should be increased. | 1851 | * Returns # of bytes which addr and buf should be increased. |
1810 | * (same number to @count). Returns 0 if [addr...addr+count) doesn't | 1852 | * (same number to @count). Returns 0 if [addr...addr+count) doesn't |
1811 | * includes any intersect with alive vmalloc area. | 1853 | * includes any intersect with alive vmalloc area. |
1812 | * | 1854 | * |
1813 | * This function checks that addr is a valid vmalloc'ed area, and | 1855 | * This function checks that addr is a valid vmalloc'ed area, and |
1814 | * copy data from that area to a given buffer. If the given memory range | 1856 | * copy data from that area to a given buffer. If the given memory range |
1815 | * of [addr...addr+count) includes some valid address, data is copied to | 1857 | * of [addr...addr+count) includes some valid address, data is copied to |
1816 | * proper area of @buf. If there are memory holes, they'll be zero-filled. | 1858 | * proper area of @buf. If there are memory holes, they'll be zero-filled. |
1817 | * IOREMAP area is treated as memory hole and no copy is done. | 1859 | * IOREMAP area is treated as memory hole and no copy is done. |
1818 | * | 1860 | * |
1819 | * If [addr...addr+count) doesn't includes any intersects with alive | 1861 | * If [addr...addr+count) doesn't includes any intersects with alive |
1820 | * vm_struct area, returns 0. | 1862 | * vm_struct area, returns 0. |
1821 | * @buf should be kernel's buffer. Because this function uses KM_USER0, | 1863 | * @buf should be kernel's buffer. Because this function uses KM_USER0, |
1822 | * the caller should guarantee KM_USER0 is not used. | 1864 | * the caller should guarantee KM_USER0 is not used. |
1823 | * | 1865 | * |
1824 | * Note: In usual ops, vread() is never necessary because the caller | 1866 | * Note: In usual ops, vread() is never necessary because the caller |
1825 | * should know vmalloc() area is valid and can use memcpy(). | 1867 | * should know vmalloc() area is valid and can use memcpy(). |
1826 | * This is for routines which have to access vmalloc area without | 1868 | * This is for routines which have to access vmalloc area without |
1827 | * any informaion, as /dev/kmem. | 1869 | * any informaion, as /dev/kmem. |
1828 | * | 1870 | * |
1829 | */ | 1871 | */ |
1830 | 1872 | ||
1831 | long vread(char *buf, char *addr, unsigned long count) | 1873 | long vread(char *buf, char *addr, unsigned long count) |
1832 | { | 1874 | { |
1833 | struct vm_struct *tmp; | 1875 | struct vm_struct *tmp; |
1834 | char *vaddr, *buf_start = buf; | 1876 | char *vaddr, *buf_start = buf; |
1835 | unsigned long buflen = count; | 1877 | unsigned long buflen = count; |
1836 | unsigned long n; | 1878 | unsigned long n; |
1837 | 1879 | ||
1838 | /* Don't allow overflow */ | 1880 | /* Don't allow overflow */ |
1839 | if ((unsigned long) addr + count < count) | 1881 | if ((unsigned long) addr + count < count) |
1840 | count = -(unsigned long) addr; | 1882 | count = -(unsigned long) addr; |
1841 | 1883 | ||
1842 | read_lock(&vmlist_lock); | 1884 | read_lock(&vmlist_lock); |
1843 | for (tmp = vmlist; count && tmp; tmp = tmp->next) { | 1885 | for (tmp = vmlist; count && tmp; tmp = tmp->next) { |
1844 | vaddr = (char *) tmp->addr; | 1886 | vaddr = (char *) tmp->addr; |
1845 | if (addr >= vaddr + tmp->size - PAGE_SIZE) | 1887 | if (addr >= vaddr + tmp->size - PAGE_SIZE) |
1846 | continue; | 1888 | continue; |
1847 | while (addr < vaddr) { | 1889 | while (addr < vaddr) { |
1848 | if (count == 0) | 1890 | if (count == 0) |
1849 | goto finished; | 1891 | goto finished; |
1850 | *buf = '\0'; | 1892 | *buf = '\0'; |
1851 | buf++; | 1893 | buf++; |
1852 | addr++; | 1894 | addr++; |
1853 | count--; | 1895 | count--; |
1854 | } | 1896 | } |
1855 | n = vaddr + tmp->size - PAGE_SIZE - addr; | 1897 | n = vaddr + tmp->size - PAGE_SIZE - addr; |
1856 | if (n > count) | 1898 | if (n > count) |
1857 | n = count; | 1899 | n = count; |
1858 | if (!(tmp->flags & VM_IOREMAP)) | 1900 | if (!(tmp->flags & VM_IOREMAP)) |
1859 | aligned_vread(buf, addr, n); | 1901 | aligned_vread(buf, addr, n); |
1860 | else /* IOREMAP area is treated as memory hole */ | 1902 | else /* IOREMAP area is treated as memory hole */ |
1861 | memset(buf, 0, n); | 1903 | memset(buf, 0, n); |
1862 | buf += n; | 1904 | buf += n; |
1863 | addr += n; | 1905 | addr += n; |
1864 | count -= n; | 1906 | count -= n; |
1865 | } | 1907 | } |
1866 | finished: | 1908 | finished: |
1867 | read_unlock(&vmlist_lock); | 1909 | read_unlock(&vmlist_lock); |
1868 | 1910 | ||
1869 | if (buf == buf_start) | 1911 | if (buf == buf_start) |
1870 | return 0; | 1912 | return 0; |
1871 | /* zero-fill memory holes */ | 1913 | /* zero-fill memory holes */ |
1872 | if (buf != buf_start + buflen) | 1914 | if (buf != buf_start + buflen) |
1873 | memset(buf, 0, buflen - (buf - buf_start)); | 1915 | memset(buf, 0, buflen - (buf - buf_start)); |
1874 | 1916 | ||
1875 | return buflen; | 1917 | return buflen; |
1876 | } | 1918 | } |
1877 | 1919 | ||
1878 | /** | 1920 | /** |
1879 | * vwrite() - write vmalloc area in a safe way. | 1921 | * vwrite() - write vmalloc area in a safe way. |
1880 | * @buf: buffer for source data | 1922 | * @buf: buffer for source data |
1881 | * @addr: vm address. | 1923 | * @addr: vm address. |
1882 | * @count: number of bytes to be read. | 1924 | * @count: number of bytes to be read. |
1883 | * | 1925 | * |
1884 | * Returns # of bytes which addr and buf should be incresed. | 1926 | * Returns # of bytes which addr and buf should be incresed. |
1885 | * (same number to @count). | 1927 | * (same number to @count). |
1886 | * If [addr...addr+count) doesn't includes any intersect with valid | 1928 | * If [addr...addr+count) doesn't includes any intersect with valid |
1887 | * vmalloc area, returns 0. | 1929 | * vmalloc area, returns 0. |
1888 | * | 1930 | * |
1889 | * This function checks that addr is a valid vmalloc'ed area, and | 1931 | * This function checks that addr is a valid vmalloc'ed area, and |
1890 | * copy data from a buffer to the given addr. If specified range of | 1932 | * copy data from a buffer to the given addr. If specified range of |
1891 | * [addr...addr+count) includes some valid address, data is copied from | 1933 | * [addr...addr+count) includes some valid address, data is copied from |
1892 | * proper area of @buf. If there are memory holes, no copy to hole. | 1934 | * proper area of @buf. If there are memory holes, no copy to hole. |
1893 | * IOREMAP area is treated as memory hole and no copy is done. | 1935 | * IOREMAP area is treated as memory hole and no copy is done. |
1894 | * | 1936 | * |
1895 | * If [addr...addr+count) doesn't includes any intersects with alive | 1937 | * If [addr...addr+count) doesn't includes any intersects with alive |
1896 | * vm_struct area, returns 0. | 1938 | * vm_struct area, returns 0. |
1897 | * @buf should be kernel's buffer. Because this function uses KM_USER0, | 1939 | * @buf should be kernel's buffer. Because this function uses KM_USER0, |
1898 | * the caller should guarantee KM_USER0 is not used. | 1940 | * the caller should guarantee KM_USER0 is not used. |
1899 | * | 1941 | * |
1900 | * Note: In usual ops, vwrite() is never necessary because the caller | 1942 | * Note: In usual ops, vwrite() is never necessary because the caller |
1901 | * should know vmalloc() area is valid and can use memcpy(). | 1943 | * should know vmalloc() area is valid and can use memcpy(). |
1902 | * This is for routines which have to access vmalloc area without | 1944 | * This is for routines which have to access vmalloc area without |
1903 | * any informaion, as /dev/kmem. | 1945 | * any informaion, as /dev/kmem. |
1904 | * | 1946 | * |
1905 | * The caller should guarantee KM_USER1 is not used. | 1947 | * The caller should guarantee KM_USER1 is not used. |
1906 | */ | 1948 | */ |
1907 | 1949 | ||
1908 | long vwrite(char *buf, char *addr, unsigned long count) | 1950 | long vwrite(char *buf, char *addr, unsigned long count) |
1909 | { | 1951 | { |
1910 | struct vm_struct *tmp; | 1952 | struct vm_struct *tmp; |
1911 | char *vaddr; | 1953 | char *vaddr; |
1912 | unsigned long n, buflen; | 1954 | unsigned long n, buflen; |
1913 | int copied = 0; | 1955 | int copied = 0; |
1914 | 1956 | ||
1915 | /* Don't allow overflow */ | 1957 | /* Don't allow overflow */ |
1916 | if ((unsigned long) addr + count < count) | 1958 | if ((unsigned long) addr + count < count) |
1917 | count = -(unsigned long) addr; | 1959 | count = -(unsigned long) addr; |
1918 | buflen = count; | 1960 | buflen = count; |
1919 | 1961 | ||
1920 | read_lock(&vmlist_lock); | 1962 | read_lock(&vmlist_lock); |
1921 | for (tmp = vmlist; count && tmp; tmp = tmp->next) { | 1963 | for (tmp = vmlist; count && tmp; tmp = tmp->next) { |
1922 | vaddr = (char *) tmp->addr; | 1964 | vaddr = (char *) tmp->addr; |
1923 | if (addr >= vaddr + tmp->size - PAGE_SIZE) | 1965 | if (addr >= vaddr + tmp->size - PAGE_SIZE) |
1924 | continue; | 1966 | continue; |
1925 | while (addr < vaddr) { | 1967 | while (addr < vaddr) { |
1926 | if (count == 0) | 1968 | if (count == 0) |
1927 | goto finished; | 1969 | goto finished; |
1928 | buf++; | 1970 | buf++; |
1929 | addr++; | 1971 | addr++; |
1930 | count--; | 1972 | count--; |
1931 | } | 1973 | } |
1932 | n = vaddr + tmp->size - PAGE_SIZE - addr; | 1974 | n = vaddr + tmp->size - PAGE_SIZE - addr; |
1933 | if (n > count) | 1975 | if (n > count) |
1934 | n = count; | 1976 | n = count; |
1935 | if (!(tmp->flags & VM_IOREMAP)) { | 1977 | if (!(tmp->flags & VM_IOREMAP)) { |
1936 | aligned_vwrite(buf, addr, n); | 1978 | aligned_vwrite(buf, addr, n); |
1937 | copied++; | 1979 | copied++; |
1938 | } | 1980 | } |
1939 | buf += n; | 1981 | buf += n; |
1940 | addr += n; | 1982 | addr += n; |
1941 | count -= n; | 1983 | count -= n; |
1942 | } | 1984 | } |
1943 | finished: | 1985 | finished: |
1944 | read_unlock(&vmlist_lock); | 1986 | read_unlock(&vmlist_lock); |
1945 | if (!copied) | 1987 | if (!copied) |
1946 | return 0; | 1988 | return 0; |
1947 | return buflen; | 1989 | return buflen; |
1948 | } | 1990 | } |
1949 | 1991 | ||
1950 | /** | 1992 | /** |
1951 | * remap_vmalloc_range - map vmalloc pages to userspace | 1993 | * remap_vmalloc_range - map vmalloc pages to userspace |
1952 | * @vma: vma to cover (map full range of vma) | 1994 | * @vma: vma to cover (map full range of vma) |
1953 | * @addr: vmalloc memory | 1995 | * @addr: vmalloc memory |
1954 | * @pgoff: number of pages into addr before first page to map | 1996 | * @pgoff: number of pages into addr before first page to map |
1955 | * | 1997 | * |
1956 | * Returns: 0 for success, -Exxx on failure | 1998 | * Returns: 0 for success, -Exxx on failure |
1957 | * | 1999 | * |
1958 | * This function checks that addr is a valid vmalloc'ed area, and | 2000 | * This function checks that addr is a valid vmalloc'ed area, and |
1959 | * that it is big enough to cover the vma. Will return failure if | 2001 | * that it is big enough to cover the vma. Will return failure if |
1960 | * that criteria isn't met. | 2002 | * that criteria isn't met. |
1961 | * | 2003 | * |
1962 | * Similar to remap_pfn_range() (see mm/memory.c) | 2004 | * Similar to remap_pfn_range() (see mm/memory.c) |
1963 | */ | 2005 | */ |
1964 | int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, | 2006 | int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, |
1965 | unsigned long pgoff) | 2007 | unsigned long pgoff) |
1966 | { | 2008 | { |
1967 | struct vm_struct *area; | 2009 | struct vm_struct *area; |
1968 | unsigned long uaddr = vma->vm_start; | 2010 | unsigned long uaddr = vma->vm_start; |
1969 | unsigned long usize = vma->vm_end - vma->vm_start; | 2011 | unsigned long usize = vma->vm_end - vma->vm_start; |
1970 | 2012 | ||
1971 | if ((PAGE_SIZE-1) & (unsigned long)addr) | 2013 | if ((PAGE_SIZE-1) & (unsigned long)addr) |
1972 | return -EINVAL; | 2014 | return -EINVAL; |
1973 | 2015 | ||
1974 | area = find_vm_area(addr); | 2016 | area = find_vm_area(addr); |
1975 | if (!area) | 2017 | if (!area) |
1976 | return -EINVAL; | 2018 | return -EINVAL; |
1977 | 2019 | ||
1978 | if (!(area->flags & VM_USERMAP)) | 2020 | if (!(area->flags & VM_USERMAP)) |
1979 | return -EINVAL; | 2021 | return -EINVAL; |
1980 | 2022 | ||
1981 | if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) | 2023 | if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) |
1982 | return -EINVAL; | 2024 | return -EINVAL; |
1983 | 2025 | ||
1984 | addr += pgoff << PAGE_SHIFT; | 2026 | addr += pgoff << PAGE_SHIFT; |
1985 | do { | 2027 | do { |
1986 | struct page *page = vmalloc_to_page(addr); | 2028 | struct page *page = vmalloc_to_page(addr); |
1987 | int ret; | 2029 | int ret; |
1988 | 2030 | ||
1989 | ret = vm_insert_page(vma, uaddr, page); | 2031 | ret = vm_insert_page(vma, uaddr, page); |
1990 | if (ret) | 2032 | if (ret) |
1991 | return ret; | 2033 | return ret; |
1992 | 2034 | ||
1993 | uaddr += PAGE_SIZE; | 2035 | uaddr += PAGE_SIZE; |
1994 | addr += PAGE_SIZE; | 2036 | addr += PAGE_SIZE; |
1995 | usize -= PAGE_SIZE; | 2037 | usize -= PAGE_SIZE; |
1996 | } while (usize > 0); | 2038 | } while (usize > 0); |
1997 | 2039 | ||
1998 | /* Prevent "things" like memory migration? VM_flags need a cleanup... */ | 2040 | /* Prevent "things" like memory migration? VM_flags need a cleanup... */ |
1999 | vma->vm_flags |= VM_RESERVED; | 2041 | vma->vm_flags |= VM_RESERVED; |
2000 | 2042 | ||
2001 | return 0; | 2043 | return 0; |
2002 | } | 2044 | } |
2003 | EXPORT_SYMBOL(remap_vmalloc_range); | 2045 | EXPORT_SYMBOL(remap_vmalloc_range); |
2004 | 2046 | ||
2005 | /* | 2047 | /* |
2006 | * Implement a stub for vmalloc_sync_all() if the architecture chose not to | 2048 | * Implement a stub for vmalloc_sync_all() if the architecture chose not to |
2007 | * have one. | 2049 | * have one. |
2008 | */ | 2050 | */ |
2009 | void __attribute__((weak)) vmalloc_sync_all(void) | 2051 | void __attribute__((weak)) vmalloc_sync_all(void) |
2010 | { | 2052 | { |
2011 | } | 2053 | } |
2012 | 2054 | ||
2013 | 2055 | ||
2014 | static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) | 2056 | static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) |
2015 | { | 2057 | { |
2016 | /* apply_to_page_range() does all the hard work. */ | 2058 | /* apply_to_page_range() does all the hard work. */ |
2017 | return 0; | 2059 | return 0; |
2018 | } | 2060 | } |
2019 | 2061 | ||
2020 | /** | 2062 | /** |
2021 | * alloc_vm_area - allocate a range of kernel address space | 2063 | * alloc_vm_area - allocate a range of kernel address space |
2022 | * @size: size of the area | 2064 | * @size: size of the area |
2023 | * | 2065 | * |
2024 | * Returns: NULL on failure, vm_struct on success | 2066 | * Returns: NULL on failure, vm_struct on success |
2025 | * | 2067 | * |
2026 | * This function reserves a range of kernel address space, and | 2068 | * This function reserves a range of kernel address space, and |
2027 | * allocates pagetables to map that range. No actual mappings | 2069 | * allocates pagetables to map that range. No actual mappings |
2028 | * are created. If the kernel address space is not shared | 2070 | * are created. If the kernel address space is not shared |
2029 | * between processes, it syncs the pagetable across all | 2071 | * between processes, it syncs the pagetable across all |
2030 | * processes. | 2072 | * processes. |
2031 | */ | 2073 | */ |
2032 | struct vm_struct *alloc_vm_area(size_t size) | 2074 | struct vm_struct *alloc_vm_area(size_t size) |
2033 | { | 2075 | { |
2034 | struct vm_struct *area; | 2076 | struct vm_struct *area; |
2035 | 2077 | ||
2036 | area = get_vm_area_caller(size, VM_IOREMAP, | 2078 | area = get_vm_area_caller(size, VM_IOREMAP, |
2037 | __builtin_return_address(0)); | 2079 | __builtin_return_address(0)); |
2038 | if (area == NULL) | 2080 | if (area == NULL) |
2039 | return NULL; | 2081 | return NULL; |
2040 | 2082 | ||
2041 | /* | 2083 | /* |
2042 | * This ensures that page tables are constructed for this region | 2084 | * This ensures that page tables are constructed for this region |
2043 | * of kernel virtual address space and mapped into init_mm. | 2085 | * of kernel virtual address space and mapped into init_mm. |
2044 | */ | 2086 | */ |
2045 | if (apply_to_page_range(&init_mm, (unsigned long)area->addr, | 2087 | if (apply_to_page_range(&init_mm, (unsigned long)area->addr, |
2046 | area->size, f, NULL)) { | 2088 | area->size, f, NULL)) { |
2047 | free_vm_area(area); | 2089 | free_vm_area(area); |
2048 | return NULL; | 2090 | return NULL; |
2049 | } | 2091 | } |
2050 | 2092 | ||
2051 | /* Make sure the pagetables are constructed in process kernel | 2093 | /* Make sure the pagetables are constructed in process kernel |
2052 | mappings */ | 2094 | mappings */ |
2053 | vmalloc_sync_all(); | 2095 | vmalloc_sync_all(); |
2054 | 2096 | ||
2055 | return area; | 2097 | return area; |
2056 | } | 2098 | } |
2057 | EXPORT_SYMBOL_GPL(alloc_vm_area); | 2099 | EXPORT_SYMBOL_GPL(alloc_vm_area); |
2058 | 2100 | ||
2059 | void free_vm_area(struct vm_struct *area) | 2101 | void free_vm_area(struct vm_struct *area) |
2060 | { | 2102 | { |
2061 | struct vm_struct *ret; | 2103 | struct vm_struct *ret; |
2062 | ret = remove_vm_area(area->addr); | 2104 | ret = remove_vm_area(area->addr); |
2063 | BUG_ON(ret != area); | 2105 | BUG_ON(ret != area); |
2064 | kfree(area); | 2106 | kfree(area); |
2065 | } | 2107 | } |
2066 | EXPORT_SYMBOL_GPL(free_vm_area); | 2108 | EXPORT_SYMBOL_GPL(free_vm_area); |
2067 | 2109 | ||
2068 | #ifdef CONFIG_SMP | 2110 | #ifdef CONFIG_SMP |
2069 | static struct vmap_area *node_to_va(struct rb_node *n) | 2111 | static struct vmap_area *node_to_va(struct rb_node *n) |
2070 | { | 2112 | { |
2071 | return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; | 2113 | return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; |
2072 | } | 2114 | } |
2073 | 2115 | ||
2074 | /** | 2116 | /** |
2075 | * pvm_find_next_prev - find the next and prev vmap_area surrounding @end | 2117 | * pvm_find_next_prev - find the next and prev vmap_area surrounding @end |
2076 | * @end: target address | 2118 | * @end: target address |
2077 | * @pnext: out arg for the next vmap_area | 2119 | * @pnext: out arg for the next vmap_area |
2078 | * @pprev: out arg for the previous vmap_area | 2120 | * @pprev: out arg for the previous vmap_area |
2079 | * | 2121 | * |
2080 | * Returns: %true if either or both of next and prev are found, | 2122 | * Returns: %true if either or both of next and prev are found, |
2081 | * %false if no vmap_area exists | 2123 | * %false if no vmap_area exists |
2082 | * | 2124 | * |
2083 | * Find vmap_areas end addresses of which enclose @end. ie. if not | 2125 | * Find vmap_areas end addresses of which enclose @end. ie. if not |
2084 | * NULL, *pnext->va_end > @end and *pprev->va_end <= @end. | 2126 | * NULL, *pnext->va_end > @end and *pprev->va_end <= @end. |
2085 | */ | 2127 | */ |
2086 | static bool pvm_find_next_prev(unsigned long end, | 2128 | static bool pvm_find_next_prev(unsigned long end, |
2087 | struct vmap_area **pnext, | 2129 | struct vmap_area **pnext, |
2088 | struct vmap_area **pprev) | 2130 | struct vmap_area **pprev) |
2089 | { | 2131 | { |
2090 | struct rb_node *n = vmap_area_root.rb_node; | 2132 | struct rb_node *n = vmap_area_root.rb_node; |
2091 | struct vmap_area *va = NULL; | 2133 | struct vmap_area *va = NULL; |
2092 | 2134 | ||
2093 | while (n) { | 2135 | while (n) { |
2094 | va = rb_entry(n, struct vmap_area, rb_node); | 2136 | va = rb_entry(n, struct vmap_area, rb_node); |
2095 | if (end < va->va_end) | 2137 | if (end < va->va_end) |
2096 | n = n->rb_left; | 2138 | n = n->rb_left; |
2097 | else if (end > va->va_end) | 2139 | else if (end > va->va_end) |
2098 | n = n->rb_right; | 2140 | n = n->rb_right; |
2099 | else | 2141 | else |
2100 | break; | 2142 | break; |
2101 | } | 2143 | } |
2102 | 2144 | ||
2103 | if (!va) | 2145 | if (!va) |
2104 | return false; | 2146 | return false; |
2105 | 2147 | ||
2106 | if (va->va_end > end) { | 2148 | if (va->va_end > end) { |
2107 | *pnext = va; | 2149 | *pnext = va; |
2108 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); | 2150 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); |
2109 | } else { | 2151 | } else { |
2110 | *pprev = va; | 2152 | *pprev = va; |
2111 | *pnext = node_to_va(rb_next(&(*pprev)->rb_node)); | 2153 | *pnext = node_to_va(rb_next(&(*pprev)->rb_node)); |
2112 | } | 2154 | } |
2113 | return true; | 2155 | return true; |
2114 | } | 2156 | } |
2115 | 2157 | ||
2116 | /** | 2158 | /** |
2117 | * pvm_determine_end - find the highest aligned address between two vmap_areas | 2159 | * pvm_determine_end - find the highest aligned address between two vmap_areas |
2118 | * @pnext: in/out arg for the next vmap_area | 2160 | * @pnext: in/out arg for the next vmap_area |
2119 | * @pprev: in/out arg for the previous vmap_area | 2161 | * @pprev: in/out arg for the previous vmap_area |
2120 | * @align: alignment | 2162 | * @align: alignment |
2121 | * | 2163 | * |
2122 | * Returns: determined end address | 2164 | * Returns: determined end address |
2123 | * | 2165 | * |
2124 | * Find the highest aligned address between *@pnext and *@pprev below | 2166 | * Find the highest aligned address between *@pnext and *@pprev below |
2125 | * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned | 2167 | * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned |
2126 | * down address is between the end addresses of the two vmap_areas. | 2168 | * down address is between the end addresses of the two vmap_areas. |
2127 | * | 2169 | * |
2128 | * Please note that the address returned by this function may fall | 2170 | * Please note that the address returned by this function may fall |
2129 | * inside *@pnext vmap_area. The caller is responsible for checking | 2171 | * inside *@pnext vmap_area. The caller is responsible for checking |
2130 | * that. | 2172 | * that. |
2131 | */ | 2173 | */ |
2132 | static unsigned long pvm_determine_end(struct vmap_area **pnext, | 2174 | static unsigned long pvm_determine_end(struct vmap_area **pnext, |
2133 | struct vmap_area **pprev, | 2175 | struct vmap_area **pprev, |
2134 | unsigned long align) | 2176 | unsigned long align) |
2135 | { | 2177 | { |
2136 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); | 2178 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); |
2137 | unsigned long addr; | 2179 | unsigned long addr; |
2138 | 2180 | ||
2139 | if (*pnext) | 2181 | if (*pnext) |
2140 | addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end); | 2182 | addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end); |
2141 | else | 2183 | else |
2142 | addr = vmalloc_end; | 2184 | addr = vmalloc_end; |
2143 | 2185 | ||
2144 | while (*pprev && (*pprev)->va_end > addr) { | 2186 | while (*pprev && (*pprev)->va_end > addr) { |
2145 | *pnext = *pprev; | 2187 | *pnext = *pprev; |
2146 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); | 2188 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); |
2147 | } | 2189 | } |
2148 | 2190 | ||
2149 | return addr; | 2191 | return addr; |
2150 | } | 2192 | } |
2151 | 2193 | ||
2152 | /** | 2194 | /** |
2153 | * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator | 2195 | * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator |
2154 | * @offsets: array containing offset of each area | 2196 | * @offsets: array containing offset of each area |
2155 | * @sizes: array containing size of each area | 2197 | * @sizes: array containing size of each area |
2156 | * @nr_vms: the number of areas to allocate | 2198 | * @nr_vms: the number of areas to allocate |
2157 | * @align: alignment, all entries in @offsets and @sizes must be aligned to this | 2199 | * @align: alignment, all entries in @offsets and @sizes must be aligned to this |
2158 | * @gfp_mask: allocation mask | 2200 | * @gfp_mask: allocation mask |
2159 | * | 2201 | * |
2160 | * Returns: kmalloc'd vm_struct pointer array pointing to allocated | 2202 | * Returns: kmalloc'd vm_struct pointer array pointing to allocated |
2161 | * vm_structs on success, %NULL on failure | 2203 | * vm_structs on success, %NULL on failure |
2162 | * | 2204 | * |
2163 | * Percpu allocator wants to use congruent vm areas so that it can | 2205 | * Percpu allocator wants to use congruent vm areas so that it can |
2164 | * maintain the offsets among percpu areas. This function allocates | 2206 | * maintain the offsets among percpu areas. This function allocates |
2165 | * congruent vmalloc areas for it. These areas tend to be scattered | 2207 | * congruent vmalloc areas for it. These areas tend to be scattered |
2166 | * pretty far, distance between two areas easily going up to | 2208 | * pretty far, distance between two areas easily going up to |
2167 | * gigabytes. To avoid interacting with regular vmallocs, these areas | 2209 | * gigabytes. To avoid interacting with regular vmallocs, these areas |
2168 | * are allocated from top. | 2210 | * are allocated from top. |
2169 | * | 2211 | * |
2170 | * Despite its complicated look, this allocator is rather simple. It | 2212 | * Despite its complicated look, this allocator is rather simple. It |
2171 | * does everything top-down and scans areas from the end looking for | 2213 | * does everything top-down and scans areas from the end looking for |
2172 | * matching slot. While scanning, if any of the areas overlaps with | 2214 | * matching slot. While scanning, if any of the areas overlaps with |
2173 | * existing vmap_area, the base address is pulled down to fit the | 2215 | * existing vmap_area, the base address is pulled down to fit the |
2174 | * area. Scanning is repeated till all the areas fit and then all | 2216 | * area. Scanning is repeated till all the areas fit and then all |
2175 | * necessary data structres are inserted and the result is returned. | 2217 | * necessary data structres are inserted and the result is returned. |
2176 | */ | 2218 | */ |
2177 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, | 2219 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, |
2178 | const size_t *sizes, int nr_vms, | 2220 | const size_t *sizes, int nr_vms, |
2179 | size_t align, gfp_t gfp_mask) | 2221 | size_t align, gfp_t gfp_mask) |
2180 | { | 2222 | { |
2181 | const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); | 2223 | const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); |
2182 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); | 2224 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); |
2183 | struct vmap_area **vas, *prev, *next; | 2225 | struct vmap_area **vas, *prev, *next; |
2184 | struct vm_struct **vms; | 2226 | struct vm_struct **vms; |
2185 | int area, area2, last_area, term_area; | 2227 | int area, area2, last_area, term_area; |
2186 | unsigned long base, start, end, last_end; | 2228 | unsigned long base, start, end, last_end; |
2187 | bool purged = false; | 2229 | bool purged = false; |
2188 | 2230 | ||
2189 | gfp_mask &= GFP_RECLAIM_MASK; | 2231 | gfp_mask &= GFP_RECLAIM_MASK; |
2190 | 2232 | ||
2191 | /* verify parameters and allocate data structures */ | 2233 | /* verify parameters and allocate data structures */ |
2192 | BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); | 2234 | BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); |
2193 | for (last_area = 0, area = 0; area < nr_vms; area++) { | 2235 | for (last_area = 0, area = 0; area < nr_vms; area++) { |
2194 | start = offsets[area]; | 2236 | start = offsets[area]; |
2195 | end = start + sizes[area]; | 2237 | end = start + sizes[area]; |
2196 | 2238 | ||
2197 | /* is everything aligned properly? */ | 2239 | /* is everything aligned properly? */ |
2198 | BUG_ON(!IS_ALIGNED(offsets[area], align)); | 2240 | BUG_ON(!IS_ALIGNED(offsets[area], align)); |
2199 | BUG_ON(!IS_ALIGNED(sizes[area], align)); | 2241 | BUG_ON(!IS_ALIGNED(sizes[area], align)); |
2200 | 2242 | ||
2201 | /* detect the area with the highest address */ | 2243 | /* detect the area with the highest address */ |
2202 | if (start > offsets[last_area]) | 2244 | if (start > offsets[last_area]) |
2203 | last_area = area; | 2245 | last_area = area; |
2204 | 2246 | ||
2205 | for (area2 = 0; area2 < nr_vms; area2++) { | 2247 | for (area2 = 0; area2 < nr_vms; area2++) { |
2206 | unsigned long start2 = offsets[area2]; | 2248 | unsigned long start2 = offsets[area2]; |
2207 | unsigned long end2 = start2 + sizes[area2]; | 2249 | unsigned long end2 = start2 + sizes[area2]; |
2208 | 2250 | ||
2209 | if (area2 == area) | 2251 | if (area2 == area) |
2210 | continue; | 2252 | continue; |
2211 | 2253 | ||
2212 | BUG_ON(start2 >= start && start2 < end); | 2254 | BUG_ON(start2 >= start && start2 < end); |
2213 | BUG_ON(end2 <= end && end2 > start); | 2255 | BUG_ON(end2 <= end && end2 > start); |
2214 | } | 2256 | } |
2215 | } | 2257 | } |
2216 | last_end = offsets[last_area] + sizes[last_area]; | 2258 | last_end = offsets[last_area] + sizes[last_area]; |
2217 | 2259 | ||
2218 | if (vmalloc_end - vmalloc_start < last_end) { | 2260 | if (vmalloc_end - vmalloc_start < last_end) { |
2219 | WARN_ON(true); | 2261 | WARN_ON(true); |
2220 | return NULL; | 2262 | return NULL; |
2221 | } | 2263 | } |
2222 | 2264 | ||
2223 | vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask); | 2265 | vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask); |
2224 | vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask); | 2266 | vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask); |
2225 | if (!vas || !vms) | 2267 | if (!vas || !vms) |
2226 | goto err_free; | 2268 | goto err_free; |
2227 | 2269 | ||
2228 | for (area = 0; area < nr_vms; area++) { | 2270 | for (area = 0; area < nr_vms; area++) { |
2229 | vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask); | 2271 | vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask); |
2230 | vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask); | 2272 | vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask); |
2231 | if (!vas[area] || !vms[area]) | 2273 | if (!vas[area] || !vms[area]) |
2232 | goto err_free; | 2274 | goto err_free; |
2233 | } | 2275 | } |
2234 | retry: | 2276 | retry: |
2235 | spin_lock(&vmap_area_lock); | 2277 | spin_lock(&vmap_area_lock); |
2236 | 2278 | ||
2237 | /* start scanning - we scan from the top, begin with the last area */ | 2279 | /* start scanning - we scan from the top, begin with the last area */ |
2238 | area = term_area = last_area; | 2280 | area = term_area = last_area; |
2239 | start = offsets[area]; | 2281 | start = offsets[area]; |
2240 | end = start + sizes[area]; | 2282 | end = start + sizes[area]; |
2241 | 2283 | ||
2242 | if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) { | 2284 | if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) { |
2243 | base = vmalloc_end - last_end; | 2285 | base = vmalloc_end - last_end; |
2244 | goto found; | 2286 | goto found; |
2245 | } | 2287 | } |
2246 | base = pvm_determine_end(&next, &prev, align) - end; | 2288 | base = pvm_determine_end(&next, &prev, align) - end; |
2247 | 2289 | ||
2248 | while (true) { | 2290 | while (true) { |
2249 | BUG_ON(next && next->va_end <= base + end); | 2291 | BUG_ON(next && next->va_end <= base + end); |
2250 | BUG_ON(prev && prev->va_end > base + end); | 2292 | BUG_ON(prev && prev->va_end > base + end); |
2251 | 2293 | ||
2252 | /* | 2294 | /* |
2253 | * base might have underflowed, add last_end before | 2295 | * base might have underflowed, add last_end before |
2254 | * comparing. | 2296 | * comparing. |
2255 | */ | 2297 | */ |
2256 | if (base + last_end < vmalloc_start + last_end) { | 2298 | if (base + last_end < vmalloc_start + last_end) { |
2257 | spin_unlock(&vmap_area_lock); | 2299 | spin_unlock(&vmap_area_lock); |
2258 | if (!purged) { | 2300 | if (!purged) { |
2259 | purge_vmap_area_lazy(); | 2301 | purge_vmap_area_lazy(); |
2260 | purged = true; | 2302 | purged = true; |
2261 | goto retry; | 2303 | goto retry; |
2262 | } | 2304 | } |
2263 | goto err_free; | 2305 | goto err_free; |
2264 | } | 2306 | } |
2265 | 2307 | ||
2266 | /* | 2308 | /* |
2267 | * If next overlaps, move base downwards so that it's | 2309 | * If next overlaps, move base downwards so that it's |
2268 | * right below next and then recheck. | 2310 | * right below next and then recheck. |
2269 | */ | 2311 | */ |
2270 | if (next && next->va_start < base + end) { | 2312 | if (next && next->va_start < base + end) { |
2271 | base = pvm_determine_end(&next, &prev, align) - end; | 2313 | base = pvm_determine_end(&next, &prev, align) - end; |
2272 | term_area = area; | 2314 | term_area = area; |
2273 | continue; | 2315 | continue; |
2274 | } | 2316 | } |
2275 | 2317 | ||
2276 | /* | 2318 | /* |
2277 | * If prev overlaps, shift down next and prev and move | 2319 | * If prev overlaps, shift down next and prev and move |
2278 | * base so that it's right below new next and then | 2320 | * base so that it's right below new next and then |
2279 | * recheck. | 2321 | * recheck. |
2280 | */ | 2322 | */ |
2281 | if (prev && prev->va_end > base + start) { | 2323 | if (prev && prev->va_end > base + start) { |
2282 | next = prev; | 2324 | next = prev; |
2283 | prev = node_to_va(rb_prev(&next->rb_node)); | 2325 | prev = node_to_va(rb_prev(&next->rb_node)); |
2284 | base = pvm_determine_end(&next, &prev, align) - end; | 2326 | base = pvm_determine_end(&next, &prev, align) - end; |
2285 | term_area = area; | 2327 | term_area = area; |
2286 | continue; | 2328 | continue; |
2287 | } | 2329 | } |
2288 | 2330 | ||
2289 | /* | 2331 | /* |
2290 | * This area fits, move on to the previous one. If | 2332 | * This area fits, move on to the previous one. If |
2291 | * the previous one is the terminal one, we're done. | 2333 | * the previous one is the terminal one, we're done. |
2292 | */ | 2334 | */ |
2293 | area = (area + nr_vms - 1) % nr_vms; | 2335 | area = (area + nr_vms - 1) % nr_vms; |
2294 | if (area == term_area) | 2336 | if (area == term_area) |
2295 | break; | 2337 | break; |
2296 | start = offsets[area]; | 2338 | start = offsets[area]; |
2297 | end = start + sizes[area]; | 2339 | end = start + sizes[area]; |
2298 | pvm_find_next_prev(base + end, &next, &prev); | 2340 | pvm_find_next_prev(base + end, &next, &prev); |
2299 | } | 2341 | } |
2300 | found: | 2342 | found: |
2301 | /* we've found a fitting base, insert all va's */ | 2343 | /* we've found a fitting base, insert all va's */ |
2302 | for (area = 0; area < nr_vms; area++) { | 2344 | for (area = 0; area < nr_vms; area++) { |
2303 | struct vmap_area *va = vas[area]; | 2345 | struct vmap_area *va = vas[area]; |
2304 | 2346 | ||
2305 | va->va_start = base + offsets[area]; | 2347 | va->va_start = base + offsets[area]; |
2306 | va->va_end = va->va_start + sizes[area]; | 2348 | va->va_end = va->va_start + sizes[area]; |
2307 | __insert_vmap_area(va); | 2349 | __insert_vmap_area(va); |
2308 | } | 2350 | } |
2309 | 2351 | ||
2310 | vmap_area_pcpu_hole = base + offsets[last_area]; | 2352 | vmap_area_pcpu_hole = base + offsets[last_area]; |
2311 | 2353 | ||
2312 | spin_unlock(&vmap_area_lock); | 2354 | spin_unlock(&vmap_area_lock); |
2313 | 2355 | ||
2314 | /* insert all vm's */ | 2356 | /* insert all vm's */ |
2315 | for (area = 0; area < nr_vms; area++) | 2357 | for (area = 0; area < nr_vms; area++) |
2316 | insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC, | 2358 | insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC, |
2317 | pcpu_get_vm_areas); | 2359 | pcpu_get_vm_areas); |
2318 | 2360 | ||
2319 | kfree(vas); | 2361 | kfree(vas); |
2320 | return vms; | 2362 | return vms; |
2321 | 2363 | ||
2322 | err_free: | 2364 | err_free: |
2323 | for (area = 0; area < nr_vms; area++) { | 2365 | for (area = 0; area < nr_vms; area++) { |
2324 | if (vas) | 2366 | if (vas) |
2325 | kfree(vas[area]); | 2367 | kfree(vas[area]); |
2326 | if (vms) | 2368 | if (vms) |
2327 | kfree(vms[area]); | 2369 | kfree(vms[area]); |
2328 | } | 2370 | } |
2329 | kfree(vas); | 2371 | kfree(vas); |
2330 | kfree(vms); | 2372 | kfree(vms); |
2331 | return NULL; | 2373 | return NULL; |
2332 | } | 2374 | } |
2333 | 2375 | ||
2334 | /** | 2376 | /** |
2335 | * pcpu_free_vm_areas - free vmalloc areas for percpu allocator | 2377 | * pcpu_free_vm_areas - free vmalloc areas for percpu allocator |
2336 | * @vms: vm_struct pointer array returned by pcpu_get_vm_areas() | 2378 | * @vms: vm_struct pointer array returned by pcpu_get_vm_areas() |
2337 | * @nr_vms: the number of allocated areas | 2379 | * @nr_vms: the number of allocated areas |
2338 | * | 2380 | * |
2339 | * Free vm_structs and the array allocated by pcpu_get_vm_areas(). | 2381 | * Free vm_structs and the array allocated by pcpu_get_vm_areas(). |
2340 | */ | 2382 | */ |
2341 | void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) | 2383 | void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) |
2342 | { | 2384 | { |
2343 | int i; | 2385 | int i; |
2344 | 2386 | ||
2345 | for (i = 0; i < nr_vms; i++) | 2387 | for (i = 0; i < nr_vms; i++) |
2346 | free_vm_area(vms[i]); | 2388 | free_vm_area(vms[i]); |
2347 | kfree(vms); | 2389 | kfree(vms); |
2348 | } | 2390 | } |
2349 | #endif /* CONFIG_SMP */ | 2391 | #endif /* CONFIG_SMP */ |
2350 | 2392 | ||
2351 | #ifdef CONFIG_PROC_FS | 2393 | #ifdef CONFIG_PROC_FS |
2352 | static void *s_start(struct seq_file *m, loff_t *pos) | 2394 | static void *s_start(struct seq_file *m, loff_t *pos) |
2353 | __acquires(&vmlist_lock) | 2395 | __acquires(&vmlist_lock) |
2354 | { | 2396 | { |
2355 | loff_t n = *pos; | 2397 | loff_t n = *pos; |
2356 | struct vm_struct *v; | 2398 | struct vm_struct *v; |
2357 | 2399 | ||
2358 | read_lock(&vmlist_lock); | 2400 | read_lock(&vmlist_lock); |
2359 | v = vmlist; | 2401 | v = vmlist; |
2360 | while (n > 0 && v) { | 2402 | while (n > 0 && v) { |
2361 | n--; | 2403 | n--; |
2362 | v = v->next; | 2404 | v = v->next; |
2363 | } | 2405 | } |
2364 | if (!n) | 2406 | if (!n) |
2365 | return v; | 2407 | return v; |
2366 | 2408 | ||
2367 | return NULL; | 2409 | return NULL; |
2368 | 2410 | ||
2369 | } | 2411 | } |
2370 | 2412 | ||
2371 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) | 2413 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) |
2372 | { | 2414 | { |
2373 | struct vm_struct *v = p; | 2415 | struct vm_struct *v = p; |
2374 | 2416 | ||
2375 | ++*pos; | 2417 | ++*pos; |
2376 | return v->next; | 2418 | return v->next; |
2377 | } | 2419 | } |
2378 | 2420 | ||
2379 | static void s_stop(struct seq_file *m, void *p) | 2421 | static void s_stop(struct seq_file *m, void *p) |
2380 | __releases(&vmlist_lock) | 2422 | __releases(&vmlist_lock) |
2381 | { | 2423 | { |
2382 | read_unlock(&vmlist_lock); | 2424 | read_unlock(&vmlist_lock); |
2383 | } | 2425 | } |
2384 | 2426 | ||
2385 | static void show_numa_info(struct seq_file *m, struct vm_struct *v) | 2427 | static void show_numa_info(struct seq_file *m, struct vm_struct *v) |
2386 | { | 2428 | { |
2387 | if (NUMA_BUILD) { | 2429 | if (NUMA_BUILD) { |
2388 | unsigned int nr, *counters = m->private; | 2430 | unsigned int nr, *counters = m->private; |
2389 | 2431 | ||
2390 | if (!counters) | 2432 | if (!counters) |
2391 | return; | 2433 | return; |
2392 | 2434 | ||
2393 | memset(counters, 0, nr_node_ids * sizeof(unsigned int)); | 2435 | memset(counters, 0, nr_node_ids * sizeof(unsigned int)); |
2394 | 2436 | ||
2395 | for (nr = 0; nr < v->nr_pages; nr++) | 2437 | for (nr = 0; nr < v->nr_pages; nr++) |
2396 | counters[page_to_nid(v->pages[nr])]++; | 2438 | counters[page_to_nid(v->pages[nr])]++; |
2397 | 2439 | ||
2398 | for_each_node_state(nr, N_HIGH_MEMORY) | 2440 | for_each_node_state(nr, N_HIGH_MEMORY) |
2399 | if (counters[nr]) | 2441 | if (counters[nr]) |
2400 | seq_printf(m, " N%u=%u", nr, counters[nr]); | 2442 | seq_printf(m, " N%u=%u", nr, counters[nr]); |
2401 | } | 2443 | } |
2402 | } | 2444 | } |
2403 | 2445 | ||
2404 | static int s_show(struct seq_file *m, void *p) | 2446 | static int s_show(struct seq_file *m, void *p) |
2405 | { | 2447 | { |
2406 | struct vm_struct *v = p; | 2448 | struct vm_struct *v = p; |
2407 | 2449 | ||
2408 | seq_printf(m, "0x%p-0x%p %7ld", | 2450 | seq_printf(m, "0x%p-0x%p %7ld", |
2409 | v->addr, v->addr + v->size, v->size); | 2451 | v->addr, v->addr + v->size, v->size); |
2410 | 2452 | ||
2411 | if (v->caller) { | 2453 | if (v->caller) { |
2412 | char buff[KSYM_SYMBOL_LEN]; | 2454 | char buff[KSYM_SYMBOL_LEN]; |
2413 | 2455 | ||
2414 | seq_putc(m, ' '); | 2456 | seq_putc(m, ' '); |
2415 | sprint_symbol(buff, (unsigned long)v->caller); | 2457 | sprint_symbol(buff, (unsigned long)v->caller); |
2416 | seq_puts(m, buff); | 2458 | seq_puts(m, buff); |
2417 | } | 2459 | } |
2418 | 2460 | ||
2419 | if (v->nr_pages) | 2461 | if (v->nr_pages) |
2420 | seq_printf(m, " pages=%d", v->nr_pages); | 2462 | seq_printf(m, " pages=%d", v->nr_pages); |
2421 | 2463 | ||
2422 | if (v->phys_addr) | 2464 | if (v->phys_addr) |
2423 | seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr); | 2465 | seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr); |
2424 | 2466 | ||
2425 | if (v->flags & VM_IOREMAP) | 2467 | if (v->flags & VM_IOREMAP) |
2426 | seq_printf(m, " ioremap"); | 2468 | seq_printf(m, " ioremap"); |
2427 | 2469 | ||
2428 | if (v->flags & VM_ALLOC) | 2470 | if (v->flags & VM_ALLOC) |
2429 | seq_printf(m, " vmalloc"); | 2471 | seq_printf(m, " vmalloc"); |
2430 | 2472 | ||
2431 | if (v->flags & VM_MAP) | 2473 | if (v->flags & VM_MAP) |
2432 | seq_printf(m, " vmap"); | 2474 | seq_printf(m, " vmap"); |
2433 | 2475 | ||
2434 | if (v->flags & VM_USERMAP) | 2476 | if (v->flags & VM_USERMAP) |
2435 | seq_printf(m, " user"); | 2477 | seq_printf(m, " user"); |
2436 | 2478 | ||
2437 | if (v->flags & VM_VPAGES) | 2479 | if (v->flags & VM_VPAGES) |
2438 | seq_printf(m, " vpages"); | 2480 | seq_printf(m, " vpages"); |
2439 | 2481 | ||
2440 | show_numa_info(m, v); | 2482 | show_numa_info(m, v); |
2441 | seq_putc(m, '\n'); | 2483 | seq_putc(m, '\n'); |
2442 | return 0; | 2484 | return 0; |
2443 | } | 2485 | } |
2444 | 2486 | ||
2445 | static const struct seq_operations vmalloc_op = { | 2487 | static const struct seq_operations vmalloc_op = { |
2446 | .start = s_start, | 2488 | .start = s_start, |
2447 | .next = s_next, | 2489 | .next = s_next, |
2448 | .stop = s_stop, | 2490 | .stop = s_stop, |
2449 | .show = s_show, | 2491 | .show = s_show, |
2450 | }; | 2492 | }; |
2451 | 2493 | ||
2452 | static int vmalloc_open(struct inode *inode, struct file *file) | 2494 | static int vmalloc_open(struct inode *inode, struct file *file) |
2453 | { | 2495 | { |
2454 | unsigned int *ptr = NULL; | 2496 | unsigned int *ptr = NULL; |
2455 | int ret; | 2497 | int ret; |
2456 | 2498 | ||
2457 | if (NUMA_BUILD) { | 2499 | if (NUMA_BUILD) { |
2458 | ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); | 2500 | ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); |
2459 | if (ptr == NULL) | 2501 | if (ptr == NULL) |
2460 | return -ENOMEM; | 2502 | return -ENOMEM; |
2461 | } | 2503 | } |
2462 | ret = seq_open(file, &vmalloc_op); | 2504 | ret = seq_open(file, &vmalloc_op); |
2463 | if (!ret) { | 2505 | if (!ret) { |
2464 | struct seq_file *m = file->private_data; | 2506 | struct seq_file *m = file->private_data; |
2465 | m->private = ptr; | 2507 | m->private = ptr; |
2466 | } else | 2508 | } else |
2467 | kfree(ptr); | 2509 | kfree(ptr); |
2468 | return ret; | 2510 | return ret; |
2469 | } | 2511 | } |
2470 | 2512 | ||
2471 | static const struct file_operations proc_vmalloc_operations = { | 2513 | static const struct file_operations proc_vmalloc_operations = { |
2472 | .open = vmalloc_open, | 2514 | .open = vmalloc_open, |
2473 | .read = seq_read, | 2515 | .read = seq_read, |
2474 | .llseek = seq_lseek, | 2516 | .llseek = seq_lseek, |
2475 | .release = seq_release_private, | 2517 | .release = seq_release_private, |
2476 | }; | 2518 | }; |
2477 | 2519 | ||
2478 | static int __init proc_vmalloc_init(void) | 2520 | static int __init proc_vmalloc_init(void) |
2479 | { | 2521 | { |
2480 | proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); | 2522 | proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); |
2481 | return 0; | 2523 | return 0; |
2482 | } | 2524 | } |
2483 | module_init(proc_vmalloc_init); | 2525 | module_init(proc_vmalloc_init); |
2484 | #endif | 2526 | #endif |
2485 | 2527 |