Blame view

mm/vmalloc.c 89.4 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
8
  /*
   *  linux/mm/vmalloc.c
   *
   *  Copyright (C) 1993  Linus Torvalds
   *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
   *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
9
   *  Numa awareness, Christoph Lameter, SGI, June 2005
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
10
   */
db64fe022   Nick Piggin   mm: rewrite vmap ...
11
  #include <linux/vmalloc.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/highmem.h>
c3edc4010   Ingo Molnar   sched/headers: Mo...
15
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
18
  #include <linux/slab.h>
  #include <linux/spinlock.h>
  #include <linux/interrupt.h>
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
19
  #include <linux/proc_fs.h>
a10aa5798   Christoph Lameter   vmalloc: show vma...
20
  #include <linux/seq_file.h>
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
21
  #include <linux/set_memory.h>
3ac7fe5a4   Thomas Gleixner   infrastructure to...
22
  #include <linux/debugobjects.h>
230169693   Christoph Lameter   vmallocinfo: add ...
23
  #include <linux/kallsyms.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
24
  #include <linux/list.h>
4da56b99d   Chris Wilson   mm/vmap: Add a no...
25
  #include <linux/notifier.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
26
27
28
  #include <linux/rbtree.h>
  #include <linux/radix-tree.h>
  #include <linux/rcupdate.h>
f0aa66179   Tejun Heo   vmalloc: implemen...
29
  #include <linux/pfn.h>
89219d37a   Catalin Marinas   kmemleak: Add the...
30
  #include <linux/kmemleak.h>
60063497a   Arun Sharma   atomic: use <linu...
31
  #include <linux/atomic.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
32
  #include <linux/compiler.h>
32fcfd407   Al Viro   make vfree() safe...
33
  #include <linux/llist.h>
0f616be12   Toshi Kani   mm: change __get_...
34
  #include <linux/bitops.h>
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
35
  #include <linux/rbtree_augmented.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
36

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
37
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
38
  #include <asm/tlbflush.h>
2dca6999e   David Miller   mm, perf_event: M...
39
  #include <asm/shmparam.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40

dd56b0464   Mel Gorman   mm: page_alloc: h...
41
  #include "internal.h"
32fcfd407   Al Viro   make vfree() safe...
42
43
44
45
46
47
48
49
50
51
52
  struct vfree_deferred {
  	struct llist_head list;
  	struct work_struct wq;
  };
  static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
  
  static void __vunmap(const void *, int);
  
  static void free_work(struct work_struct *w)
  {
  	struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
894e58c14   Byungchul Park   mm/vmalloc.c: don...
53
54
55
56
  	struct llist_node *t, *llnode;
  
  	llist_for_each_safe(llnode, t, llist_del_all(&p->list))
  		__vunmap((void *)llnode, 1);
32fcfd407   Al Viro   make vfree() safe...
57
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
58
  /*** Page table manipulation functions ***/
b221385bc   Adrian Bunk   [PATCH] mm/: make...
59

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60
61
62
63
64
65
66
67
68
69
  static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
  {
  	pte_t *pte;
  
  	pte = pte_offset_kernel(pmd, addr);
  	do {
  		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
  		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
  	} while (pte++, addr += PAGE_SIZE, addr != end);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
70
  static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
72
73
74
75
76
77
  {
  	pmd_t *pmd;
  	unsigned long next;
  
  	pmd = pmd_offset(pud, addr);
  	do {
  		next = pmd_addr_end(addr, end);
b9820d8f3   Toshi Kani   mm: change vunmap...
78
79
  		if (pmd_clear_huge(pmd))
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
81
82
83
84
  		if (pmd_none_or_clear_bad(pmd))
  			continue;
  		vunmap_pte_range(pmd, addr, next);
  	} while (pmd++, addr = next, addr != end);
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
85
  static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86
87
88
  {
  	pud_t *pud;
  	unsigned long next;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
89
  	pud = pud_offset(p4d, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90
91
  	do {
  		next = pud_addr_end(addr, end);
b9820d8f3   Toshi Kani   mm: change vunmap...
92
93
  		if (pud_clear_huge(pud))
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
94
95
96
97
98
  		if (pud_none_or_clear_bad(pud))
  			continue;
  		vunmap_pmd_range(pud, addr, next);
  	} while (pud++, addr = next, addr != end);
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
  {
  	p4d_t *p4d;
  	unsigned long next;
  
  	p4d = p4d_offset(pgd, addr);
  	do {
  		next = p4d_addr_end(addr, end);
  		if (p4d_clear_huge(p4d))
  			continue;
  		if (p4d_none_or_clear_bad(p4d))
  			continue;
  		vunmap_pud_range(p4d, addr, next);
  	} while (p4d++, addr = next, addr != end);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
114
  static void vunmap_page_range(unsigned long addr, unsigned long end)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
115
116
117
  {
  	pgd_t *pgd;
  	unsigned long next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
119
120
  
  	BUG_ON(addr >= end);
  	pgd = pgd_offset_k(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121
122
123
124
  	do {
  		next = pgd_addr_end(addr, end);
  		if (pgd_none_or_clear_bad(pgd))
  			continue;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
125
  		vunmap_p4d_range(pgd, addr, next);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
126
  	} while (pgd++, addr = next, addr != end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
127
128
129
  }
  
  static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
db64fe022   Nick Piggin   mm: rewrite vmap ...
130
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
131
132
  {
  	pte_t *pte;
db64fe022   Nick Piggin   mm: rewrite vmap ...
133
134
135
136
  	/*
  	 * nr is a running index into the array which helps higher level
  	 * callers keep track of where we're up to.
  	 */
872fec16d   Hugh Dickins   [PATCH] mm: init_...
137
  	pte = pte_alloc_kernel(pmd, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
138
139
140
  	if (!pte)
  		return -ENOMEM;
  	do {
db64fe022   Nick Piggin   mm: rewrite vmap ...
141
142
143
144
145
  		struct page *page = pages[*nr];
  
  		if (WARN_ON(!pte_none(*pte)))
  			return -EBUSY;
  		if (WARN_ON(!page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
147
  			return -ENOMEM;
  		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
db64fe022   Nick Piggin   mm: rewrite vmap ...
148
  		(*nr)++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
150
151
  	} while (pte++, addr += PAGE_SIZE, addr != end);
  	return 0;
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
152
153
  static int vmap_pmd_range(pud_t *pud, unsigned long addr,
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
155
156
157
158
159
160
161
162
  {
  	pmd_t *pmd;
  	unsigned long next;
  
  	pmd = pmd_alloc(&init_mm, pud, addr);
  	if (!pmd)
  		return -ENOMEM;
  	do {
  		next = pmd_addr_end(addr, end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
163
  		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
165
166
167
  			return -ENOMEM;
  	} while (pmd++, addr = next, addr != end);
  	return 0;
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
168
  static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
db64fe022   Nick Piggin   mm: rewrite vmap ...
169
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
170
171
172
  {
  	pud_t *pud;
  	unsigned long next;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
173
  	pud = pud_alloc(&init_mm, p4d, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
175
176
177
  	if (!pud)
  		return -ENOMEM;
  	do {
  		next = pud_addr_end(addr, end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
178
  		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
180
181
182
  			return -ENOMEM;
  	} while (pud++, addr = next, addr != end);
  	return 0;
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
  static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
  {
  	p4d_t *p4d;
  	unsigned long next;
  
  	p4d = p4d_alloc(&init_mm, pgd, addr);
  	if (!p4d)
  		return -ENOMEM;
  	do {
  		next = p4d_addr_end(addr, end);
  		if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
  			return -ENOMEM;
  	} while (p4d++, addr = next, addr != end);
  	return 0;
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
199
200
201
202
203
204
  /*
   * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
   * will have pfns corresponding to the "pages" array.
   *
   * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
   */
8fc489850   Tejun Heo   vmalloc: add un/m...
205
206
  static int vmap_page_range_noflush(unsigned long start, unsigned long end,
  				   pgprot_t prot, struct page **pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
208
209
  {
  	pgd_t *pgd;
  	unsigned long next;
2e4e27c7d   Adam Lackorzynski   vmalloc.c: fix fl...
210
  	unsigned long addr = start;
db64fe022   Nick Piggin   mm: rewrite vmap ...
211
212
  	int err = 0;
  	int nr = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
214
215
  
  	BUG_ON(addr >= end);
  	pgd = pgd_offset_k(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
216
217
  	do {
  		next = pgd_addr_end(addr, end);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
218
  		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
219
  		if (err)
bf88c8c83   Figo.zhang   vmalloc.c: fix do...
220
  			return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
  	} while (pgd++, addr = next, addr != end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
222

db64fe022   Nick Piggin   mm: rewrite vmap ...
223
  	return nr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
  }
8fc489850   Tejun Heo   vmalloc: add un/m...
225
226
227
228
229
230
231
232
233
  static int vmap_page_range(unsigned long start, unsigned long end,
  			   pgprot_t prot, struct page **pages)
  {
  	int ret;
  
  	ret = vmap_page_range_noflush(start, end, prot, pages);
  	flush_cache_vmap(start, end);
  	return ret;
  }
81ac3ad90   KAMEZAWA Hiroyuki   kcore: register m...
234
  int is_vmalloc_or_module_addr(const void *x)
73bdf0a60   Linus Torvalds   Introduce is_vmal...
235
236
  {
  	/*
ab4f2ee13   Russell King   [ARM] fix naming ...
237
  	 * ARM, x86-64 and sparc64 put modules in a special place,
73bdf0a60   Linus Torvalds   Introduce is_vmal...
238
239
240
241
242
243
244
245
246
247
  	 * and fall back on vmalloc() if that fails. Others
  	 * just put it in the vmalloc space.
  	 */
  #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
  	unsigned long addr = (unsigned long)x;
  	if (addr >= MODULES_VADDR && addr < MODULES_END)
  		return 1;
  #endif
  	return is_vmalloc_addr(x);
  }
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
248
  /*
add688fbd   malc   Revert "mm/vmallo...
249
   * Walk a vmap address to the struct page it maps.
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
250
   */
add688fbd   malc   Revert "mm/vmallo...
251
  struct page *vmalloc_to_page(const void *vmalloc_addr)
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
252
253
  {
  	unsigned long addr = (unsigned long) vmalloc_addr;
add688fbd   malc   Revert "mm/vmallo...
254
  	struct page *page = NULL;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
255
  	pgd_t *pgd = pgd_offset_k(addr);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
256
257
258
259
  	p4d_t *p4d;
  	pud_t *pud;
  	pmd_t *pmd;
  	pte_t *ptep, pte;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
260

7aa413def   Ingo Molnar   x86, MM: virtual ...
261
262
263
264
  	/*
  	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
  	 * architectures that do not vmalloc module space
  	 */
73bdf0a60   Linus Torvalds   Introduce is_vmal...
265
  	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
59ea74633   Jiri Slaby   MM: virtual addre...
266

c2febafc6   Kirill A. Shutemov   mm: convert gener...
267
268
269
270
271
272
  	if (pgd_none(*pgd))
  		return NULL;
  	p4d = p4d_offset(pgd, addr);
  	if (p4d_none(*p4d))
  		return NULL;
  	pud = pud_offset(p4d, addr);
029c54b09   Ard Biesheuvel   mm/vmalloc.c: hug...
273
274
275
276
277
278
279
280
281
282
283
  
  	/*
  	 * Don't dereference bad PUD or PMD (below) entries. This will also
  	 * identify huge mappings, which we may encounter on architectures
  	 * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
  	 * identified as vmalloc addresses by is_vmalloc_addr(), but are
  	 * not [unambiguously] associated with a struct page, so there is
  	 * no correct value to return for them.
  	 */
  	WARN_ON_ONCE(pud_bad(*pud));
  	if (pud_none(*pud) || pud_bad(*pud))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
284
285
  		return NULL;
  	pmd = pmd_offset(pud, addr);
029c54b09   Ard Biesheuvel   mm/vmalloc.c: hug...
286
287
  	WARN_ON_ONCE(pmd_bad(*pmd));
  	if (pmd_none(*pmd) || pmd_bad(*pmd))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
288
289
290
291
292
293
294
  		return NULL;
  
  	ptep = pte_offset_map(pmd, addr);
  	pte = *ptep;
  	if (pte_present(pte))
  		page = pte_page(pte);
  	pte_unmap(ptep);
add688fbd   malc   Revert "mm/vmallo...
295
  	return page;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
296
  }
add688fbd   malc   Revert "mm/vmallo...
297
  EXPORT_SYMBOL(vmalloc_to_page);
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
298
299
  
  /*
add688fbd   malc   Revert "mm/vmallo...
300
   * Map a vmalloc()-space virtual address to the physical page frame number.
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
301
   */
add688fbd   malc   Revert "mm/vmallo...
302
  unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
303
  {
add688fbd   malc   Revert "mm/vmallo...
304
  	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
305
  }
add688fbd   malc   Revert "mm/vmallo...
306
  EXPORT_SYMBOL(vmalloc_to_pfn);
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
307

db64fe022   Nick Piggin   mm: rewrite vmap ...
308
309
  
  /*** Global kva allocator ***/
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
310
  #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
311
  #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
312

db64fe022   Nick Piggin   mm: rewrite vmap ...
313

db64fe022   Nick Piggin   mm: rewrite vmap ...
314
  static DEFINE_SPINLOCK(vmap_area_lock);
f1c4069e1   Joonsoo Kim   mm, vmalloc: expo...
315
316
  /* Export for kexec only */
  LIST_HEAD(vmap_area_list);
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
317
  static LLIST_HEAD(vmap_purge_list);
89699605f   Nick Piggin   mm: vmap area cache
318
  static struct rb_root vmap_area_root = RB_ROOT;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
319
  static bool vmap_initialized __read_mostly;
89699605f   Nick Piggin   mm: vmap area cache
320

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
  /*
   * This kmem_cache is used for vmap_area objects. Instead of
   * allocating from slab we reuse an object from this cache to
   * make things faster. Especially in "no edge" splitting of
   * free block.
   */
  static struct kmem_cache *vmap_area_cachep;
  
  /*
   * This linked list is used in pair with free_vmap_area_root.
   * It gives O(1) access to prev/next to perform fast coalescing.
   */
  static LIST_HEAD(free_vmap_area_list);
  
  /*
   * This augment red-black tree represents the free vmap space.
   * All vmap_area objects in this tree are sorted by va->va_start
   * address. It is used for allocation and merging when a vmap
   * object is released.
   *
   * Each vmap_area node contains a maximum available free block
   * of its sub-tree, right or left. Therefore it is possible to
   * find a lowest match of free area.
   */
  static struct rb_root free_vmap_area_root = RB_ROOT;
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
346
347
348
349
350
351
  /*
   * Preload a CPU with one object for "no edge" split case. The
   * aim is to get rid of allocations from the atomic context, thus
   * to use more permissive allocation masks.
   */
  static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
352
353
354
355
356
357
358
359
360
361
362
363
364
365
  static __always_inline unsigned long
  va_size(struct vmap_area *va)
  {
  	return (va->va_end - va->va_start);
  }
  
  static __always_inline unsigned long
  get_subtree_max_size(struct rb_node *node)
  {
  	struct vmap_area *va;
  
  	va = rb_entry_safe(node, struct vmap_area, rb_node);
  	return va ? va->subtree_max_size : 0;
  }
89699605f   Nick Piggin   mm: vmap area cache
366

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
367
368
369
370
371
372
373
374
375
376
  /*
   * Gets called when remove the node and rotate.
   */
  static __always_inline unsigned long
  compute_subtree_max_size(struct vmap_area *va)
  {
  	return max3(va_size(va),
  		get_subtree_max_size(va->rb_node.rb_left),
  		get_subtree_max_size(va->rb_node.rb_right));
  }
315cc066b   Michel Lespinasse   augmented rbtree:...
377
378
  RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
  	struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
379
380
381
382
  
  static void purge_vmap_area_lazy(void);
  static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
  static unsigned long lazy_max_pages(void);
db64fe022   Nick Piggin   mm: rewrite vmap ...
383

97105f0ab   Roman Gushchin   mm: vmalloc: show...
384
385
386
387
388
389
  static atomic_long_t nr_vmalloc_pages;
  
  unsigned long vmalloc_nr_pages(void)
  {
  	return atomic_long_read(&nr_vmalloc_pages);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
390
  static struct vmap_area *__find_vmap_area(unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
391
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
392
393
394
395
396
397
398
399
  	struct rb_node *n = vmap_area_root.rb_node;
  
  	while (n) {
  		struct vmap_area *va;
  
  		va = rb_entry(n, struct vmap_area, rb_node);
  		if (addr < va->va_start)
  			n = n->rb_left;
cef2ac3f6   HATAYAMA Daisuke   vmalloc: make fin...
400
  		else if (addr >= va->va_end)
db64fe022   Nick Piggin   mm: rewrite vmap ...
401
402
403
404
405
406
407
  			n = n->rb_right;
  		else
  			return va;
  	}
  
  	return NULL;
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
  /*
   * This function returns back addresses of parent node
   * and its left or right link for further processing.
   */
  static __always_inline struct rb_node **
  find_va_links(struct vmap_area *va,
  	struct rb_root *root, struct rb_node *from,
  	struct rb_node **parent)
  {
  	struct vmap_area *tmp_va;
  	struct rb_node **link;
  
  	if (root) {
  		link = &root->rb_node;
  		if (unlikely(!*link)) {
  			*parent = NULL;
  			return link;
  		}
  	} else {
  		link = &from;
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
429

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
430
431
432
433
434
435
436
  	/*
  	 * Go to the bottom of the tree. When we hit the last point
  	 * we end up with parent rb_node and correct direction, i name
  	 * it link, where the new va->rb_node will be attached to.
  	 */
  	do {
  		tmp_va = rb_entry(*link, struct vmap_area, rb_node);
db64fe022   Nick Piggin   mm: rewrite vmap ...
437

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
438
439
440
441
442
443
444
445
446
447
448
  		/*
  		 * During the traversal we also do some sanity check.
  		 * Trigger the BUG() if there are sides(left/right)
  		 * or full overlaps.
  		 */
  		if (va->va_start < tmp_va->va_end &&
  				va->va_end <= tmp_va->va_start)
  			link = &(*link)->rb_left;
  		else if (va->va_end > tmp_va->va_start &&
  				va->va_start >= tmp_va->va_end)
  			link = &(*link)->rb_right;
db64fe022   Nick Piggin   mm: rewrite vmap ...
449
450
  		else
  			BUG();
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
  	} while (*link);
  
  	*parent = &tmp_va->rb_node;
  	return link;
  }
  
  static __always_inline struct list_head *
  get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
  {
  	struct list_head *list;
  
  	if (unlikely(!parent))
  		/*
  		 * The red-black tree where we try to find VA neighbors
  		 * before merging or inserting is empty, i.e. it means
  		 * there is no free vmap space. Normally it does not
  		 * happen but we handle this case anyway.
  		 */
  		return NULL;
  
  	list = &rb_entry(parent, struct vmap_area, rb_node)->list;
  	return (&parent->rb_right == link ? list->next : list);
  }
  
  static __always_inline void
  link_va(struct vmap_area *va, struct rb_root *root,
  	struct rb_node *parent, struct rb_node **link, struct list_head *head)
  {
  	/*
  	 * VA is still not in the list, but we can
  	 * identify its future previous list_head node.
  	 */
  	if (likely(parent)) {
  		head = &rb_entry(parent, struct vmap_area, rb_node)->list;
  		if (&parent->rb_right != link)
  			head = head->prev;
db64fe022   Nick Piggin   mm: rewrite vmap ...
487
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
  	/* Insert to the rb-tree */
  	rb_link_node(&va->rb_node, parent, link);
  	if (root == &free_vmap_area_root) {
  		/*
  		 * Some explanation here. Just perform simple insertion
  		 * to the tree. We do not set va->subtree_max_size to
  		 * its current size before calling rb_insert_augmented().
  		 * It is because of we populate the tree from the bottom
  		 * to parent levels when the node _is_ in the tree.
  		 *
  		 * Therefore we set subtree_max_size to zero after insertion,
  		 * to let __augment_tree_propagate_from() puts everything to
  		 * the correct order later on.
  		 */
  		rb_insert_augmented(&va->rb_node,
  			root, &free_vmap_area_rb_augment_cb);
  		va->subtree_max_size = 0;
  	} else {
  		rb_insert_color(&va->rb_node, root);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
508

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
509
510
  	/* Address-sort this list */
  	list_add(&va->list, head);
db64fe022   Nick Piggin   mm: rewrite vmap ...
511
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
512
513
514
  static __always_inline void
  unlink_va(struct vmap_area *va, struct rb_root *root)
  {
460e42d19   Uladzislau Rezki (Sony)   mm/vmalloc.c: swi...
515
516
  	if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
  		return;
db64fe022   Nick Piggin   mm: rewrite vmap ...
517

460e42d19   Uladzislau Rezki (Sony)   mm/vmalloc.c: swi...
518
519
520
521
522
523
524
525
  	if (root == &free_vmap_area_root)
  		rb_erase_augmented(&va->rb_node,
  			root, &free_vmap_area_rb_augment_cb);
  	else
  		rb_erase(&va->rb_node, root);
  
  	list_del(&va->list);
  	RB_CLEAR_NODE(&va->rb_node);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
526
  }
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
  #if DEBUG_AUGMENT_PROPAGATE_CHECK
  static void
  augment_tree_propagate_check(struct rb_node *n)
  {
  	struct vmap_area *va;
  	struct rb_node *node;
  	unsigned long size;
  	bool found = false;
  
  	if (n == NULL)
  		return;
  
  	va = rb_entry(n, struct vmap_area, rb_node);
  	size = va->subtree_max_size;
  	node = n;
  
  	while (node) {
  		va = rb_entry(node, struct vmap_area, rb_node);
  
  		if (get_subtree_max_size(node->rb_left) == size) {
  			node = node->rb_left;
  		} else {
  			if (va_size(va) == size) {
  				found = true;
  				break;
  			}
  
  			node = node->rb_right;
  		}
  	}
  
  	if (!found) {
  		va = rb_entry(n, struct vmap_area, rb_node);
  		pr_emerg("tree is corrupted: %lu, %lu
  ",
  			va_size(va), va->subtree_max_size);
  	}
  
  	augment_tree_propagate_check(n->rb_left);
  	augment_tree_propagate_check(n->rb_right);
  }
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
  /*
   * This function populates subtree_max_size from bottom to upper
   * levels starting from VA point. The propagation must be done
   * when VA size is modified by changing its va_start/va_end. Or
   * in case of newly inserting of VA to the tree.
   *
   * It means that __augment_tree_propagate_from() must be called:
   * - After VA has been inserted to the tree(free path);
   * - After VA has been shrunk(allocation path);
   * - After VA has been increased(merging path).
   *
   * Please note that, it does not mean that upper parent nodes
   * and their subtree_max_size are recalculated all the time up
   * to the root node.
   *
   *       4--8
   *        /\
   *       /  \
   *      /    \
   *    2--2  8--8
   *
   * For example if we modify the node 4, shrinking it to 2, then
   * no any modification is required. If we shrink the node 2 to 1
   * its subtree_max_size is updated only, and set to 1. If we shrink
   * the node 8 to 6, then its subtree_max_size is set to 6 and parent
   * node becomes 4--6.
   */
  static __always_inline void
  augment_tree_propagate_from(struct vmap_area *va)
  {
  	struct rb_node *node = &va->rb_node;
  	unsigned long new_va_sub_max_size;
  
  	while (node) {
  		va = rb_entry(node, struct vmap_area, rb_node);
  		new_va_sub_max_size = compute_subtree_max_size(va);
  
  		/*
  		 * If the newly calculated maximum available size of the
  		 * subtree is equal to the current one, then it means that
  		 * the tree is propagated correctly. So we have to stop at
  		 * this point to save cycles.
  		 */
  		if (va->subtree_max_size == new_va_sub_max_size)
  			break;
  
  		va->subtree_max_size = new_va_sub_max_size;
  		node = rb_parent(&va->rb_node);
  	}
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
618
619
620
621
  
  #if DEBUG_AUGMENT_PROPAGATE_CHECK
  	augment_tree_propagate_check(free_vmap_area_root.rb_node);
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
  }
  
  static void
  insert_vmap_area(struct vmap_area *va,
  	struct rb_root *root, struct list_head *head)
  {
  	struct rb_node **link;
  	struct rb_node *parent;
  
  	link = find_va_links(va, root, NULL, &parent);
  	link_va(va, root, parent, link, head);
  }
  
  static void
  insert_vmap_area_augment(struct vmap_area *va,
  	struct rb_node *from, struct rb_root *root,
  	struct list_head *head)
  {
  	struct rb_node **link;
  	struct rb_node *parent;
  
  	if (from)
  		link = find_va_links(va, NULL, from, &parent);
  	else
  		link = find_va_links(va, root, NULL, &parent);
  
  	link_va(va, root, parent, link, head);
  	augment_tree_propagate_from(va);
  }
  
  /*
   * Merge de-allocated chunk of VA memory with previous
   * and next free blocks. If coalesce is not done a new
   * free area is inserted. If VA has been merged, it is
   * freed.
   */
  static __always_inline void
  merge_or_add_vmap_area(struct vmap_area *va,
  	struct rb_root *root, struct list_head *head)
  {
  	struct vmap_area *sibling;
  	struct list_head *next;
  	struct rb_node **link;
  	struct rb_node *parent;
  	bool merged = false;
  
  	/*
  	 * Find a place in the tree where VA potentially will be
  	 * inserted, unless it is merged with its sibling/siblings.
  	 */
  	link = find_va_links(va, root, NULL, &parent);
  
  	/*
  	 * Get next node of VA to check if merging can be done.
  	 */
  	next = get_va_next_sibling(parent, link);
  	if (unlikely(next == NULL))
  		goto insert;
  
  	/*
  	 * start            end
  	 * |                |
  	 * |<------VA------>|<-----Next----->|
  	 *                  |                |
  	 *                  start            end
  	 */
  	if (next != head) {
  		sibling = list_entry(next, struct vmap_area, list);
  		if (sibling->va_start == va->va_end) {
  			sibling->va_start = va->va_start;
  
  			/* Check and update the tree if needed. */
  			augment_tree_propagate_from(sibling);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
  			/* Free vmap_area object. */
  			kmem_cache_free(vmap_area_cachep, va);
  
  			/* Point to the new merged area. */
  			va = sibling;
  			merged = true;
  		}
  	}
  
  	/*
  	 * start            end
  	 * |                |
  	 * |<-----Prev----->|<------VA------>|
  	 *                  |                |
  	 *                  start            end
  	 */
  	if (next->prev != head) {
  		sibling = list_entry(next->prev, struct vmap_area, list);
  		if (sibling->va_end == va->va_start) {
  			sibling->va_end = va->va_end;
  
  			/* Check and update the tree if needed. */
  			augment_tree_propagate_from(sibling);
54f63d9d8   Uladzislau Rezki (Sony)   mm/vmalloc.c: get...
718
719
  			if (merged)
  				unlink_va(va, root);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
720
721
722
  
  			/* Free vmap_area object. */
  			kmem_cache_free(vmap_area_cachep, va);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
  			return;
  		}
  	}
  
  insert:
  	if (!merged) {
  		link_va(va, root, parent, link, head);
  		augment_tree_propagate_from(va);
  	}
  }
  
  static __always_inline bool
  is_within_this_va(struct vmap_area *va, unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	unsigned long nva_start_addr;
  
  	if (va->va_start > vstart)
  		nva_start_addr = ALIGN(va->va_start, align);
  	else
  		nva_start_addr = ALIGN(vstart, align);
  
  	/* Can be overflowed due to big size or alignment. */
  	if (nva_start_addr + size < nva_start_addr ||
  			nva_start_addr < vstart)
  		return false;
  
  	return (nva_start_addr + size <= va->va_end);
  }
  
  /*
   * Find the first free block(lowest start address) in the tree,
   * that will accomplish the request corresponding to passing
   * parameters.
   */
  static __always_inline struct vmap_area *
  find_vmap_lowest_match(unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	struct vmap_area *va;
  	struct rb_node *node;
  	unsigned long length;
  
  	/* Start from the root. */
  	node = free_vmap_area_root.rb_node;
  
  	/* Adjust the search size for alignment overhead. */
  	length = size + align - 1;
  
  	while (node) {
  		va = rb_entry(node, struct vmap_area, rb_node);
  
  		if (get_subtree_max_size(node->rb_left) >= length &&
  				vstart < va->va_start) {
  			node = node->rb_left;
  		} else {
  			if (is_within_this_va(va, size, align, vstart))
  				return va;
  
  			/*
  			 * Does not make sense to go deeper towards the right
  			 * sub-tree if it does not have a free block that is
  			 * equal or bigger to the requested search length.
  			 */
  			if (get_subtree_max_size(node->rb_right) >= length) {
  				node = node->rb_right;
  				continue;
  			}
  
  			/*
3806b0414   Andrew Morton   mm/vmalloc.c: fix...
793
  			 * OK. We roll back and find the first right sub-tree,
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
  			 * that will satisfy the search criteria. It can happen
  			 * only once due to "vstart" restriction.
  			 */
  			while ((node = rb_parent(node))) {
  				va = rb_entry(node, struct vmap_area, rb_node);
  				if (is_within_this_va(va, size, align, vstart))
  					return va;
  
  				if (get_subtree_max_size(node->rb_right) >= length &&
  						vstart <= va->va_start) {
  					node = node->rb_right;
  					break;
  				}
  			}
  		}
  	}
  
  	return NULL;
  }
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
  #include <linux/random.h>
  
  static struct vmap_area *
  find_vmap_lowest_linear_match(unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	struct vmap_area *va;
  
  	list_for_each_entry(va, &free_vmap_area_list, list) {
  		if (!is_within_this_va(va, size, align, vstart))
  			continue;
  
  		return va;
  	}
  
  	return NULL;
  }
  
  static void
  find_vmap_lowest_match_check(unsigned long size)
  {
  	struct vmap_area *va_1, *va_2;
  	unsigned long vstart;
  	unsigned int rnd;
  
  	get_random_bytes(&rnd, sizeof(rnd));
  	vstart = VMALLOC_START + rnd;
  
  	va_1 = find_vmap_lowest_match(size, 1, vstart);
  	va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
  
  	if (va_1 != va_2)
  		pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx
  ",
  			va_1, va_2, vstart);
  }
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
  enum fit_type {
  	NOTHING_FIT = 0,
  	FL_FIT_TYPE = 1,	/* full fit */
  	LE_FIT_TYPE = 2,	/* left edge fit */
  	RE_FIT_TYPE = 3,	/* right edge fit */
  	NE_FIT_TYPE = 4		/* no edge fit */
  };
  
  static __always_inline enum fit_type
  classify_va_fit_type(struct vmap_area *va,
  	unsigned long nva_start_addr, unsigned long size)
  {
  	enum fit_type type;
  
  	/* Check if it is within VA. */
  	if (nva_start_addr < va->va_start ||
  			nva_start_addr + size > va->va_end)
  		return NOTHING_FIT;
  
  	/* Now classify. */
  	if (va->va_start == nva_start_addr) {
  		if (va->va_end == nva_start_addr + size)
  			type = FL_FIT_TYPE;
  		else
  			type = LE_FIT_TYPE;
  	} else if (va->va_end == nva_start_addr + size) {
  		type = RE_FIT_TYPE;
  	} else {
  		type = NE_FIT_TYPE;
  	}
  
  	return type;
  }
  
  static __always_inline int
  adjust_va_to_fit_type(struct vmap_area *va,
  	unsigned long nva_start_addr, unsigned long size,
  	enum fit_type type)
  {
2c9292336   Arnd Bergmann   mm/vmalloc.c: avo...
890
  	struct vmap_area *lva = NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
  
  	if (type == FL_FIT_TYPE) {
  		/*
  		 * No need to split VA, it fully fits.
  		 *
  		 * |               |
  		 * V      NVA      V
  		 * |---------------|
  		 */
  		unlink_va(va, &free_vmap_area_root);
  		kmem_cache_free(vmap_area_cachep, va);
  	} else if (type == LE_FIT_TYPE) {
  		/*
  		 * Split left edge of fit VA.
  		 *
  		 * |       |
  		 * V  NVA  V   R
  		 * |-------|-------|
  		 */
  		va->va_start += size;
  	} else if (type == RE_FIT_TYPE) {
  		/*
  		 * Split right edge of fit VA.
  		 *
  		 *         |       |
  		 *     L   V  NVA  V
  		 * |-------|-------|
  		 */
  		va->va_end = nva_start_addr;
  	} else if (type == NE_FIT_TYPE) {
  		/*
  		 * Split no edge of fit VA.
  		 *
  		 *     |       |
  		 *   L V  NVA  V R
  		 * |---|-------|---|
  		 */
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
  		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
  		if (unlikely(!lva)) {
  			/*
  			 * For percpu allocator we do not do any pre-allocation
  			 * and leave it as it is. The reason is it most likely
  			 * never ends up with NE_FIT_TYPE splitting. In case of
  			 * percpu allocations offsets and sizes are aligned to
  			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
  			 * are its main fitting cases.
  			 *
  			 * There are a few exceptions though, as an example it is
  			 * a first allocation (early boot up) when we have "one"
  			 * big free space that has to be split.
  			 */
  			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
  			if (!lva)
  				return -1;
  		}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
  
  		/*
  		 * Build the remainder.
  		 */
  		lva->va_start = va->va_start;
  		lva->va_end = nva_start_addr;
  
  		/*
  		 * Shrink this VA to remaining size.
  		 */
  		va->va_start = nva_start_addr + size;
  	} else {
  		return -1;
  	}
  
  	if (type != FL_FIT_TYPE) {
  		augment_tree_propagate_from(va);
2c9292336   Arnd Bergmann   mm/vmalloc.c: avo...
963
  		if (lva)	/* type == NE_FIT_TYPE */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
964
965
966
967
968
969
970
971
972
973
974
975
976
  			insert_vmap_area_augment(lva, &va->rb_node,
  				&free_vmap_area_root, &free_vmap_area_list);
  	}
  
  	return 0;
  }
  
  /*
   * Returns a start address of the newly allocated area, if success.
   * Otherwise a vend is returned that indicates failure.
   */
  static __always_inline unsigned long
  __alloc_vmap_area(unsigned long size, unsigned long align,
cacca6baf   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
977
  	unsigned long vstart, unsigned long vend)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
  {
  	unsigned long nva_start_addr;
  	struct vmap_area *va;
  	enum fit_type type;
  	int ret;
  
  	va = find_vmap_lowest_match(size, align, vstart);
  	if (unlikely(!va))
  		return vend;
  
  	if (va->va_start > vstart)
  		nva_start_addr = ALIGN(va->va_start, align);
  	else
  		nva_start_addr = ALIGN(vstart, align);
  
  	/* Check the "vend" restriction. */
  	if (nva_start_addr + size > vend)
  		return vend;
  
  	/* Classify what we have found. */
  	type = classify_va_fit_type(va, nva_start_addr, size);
  	if (WARN_ON_ONCE(type == NOTHING_FIT))
  		return vend;
  
  	/* Update the free vmap_area. */
  	ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
  	if (ret)
  		return vend;
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
1006
1007
1008
  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
  	find_vmap_lowest_match_check(size);
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1009
1010
  	return nva_start_addr;
  }
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1011

db64fe022   Nick Piggin   mm: rewrite vmap ...
1012
1013
1014
1015
1016
1017
1018
1019
1020
  /*
   * Allocate a region of KVA of the specified size and alignment, within the
   * vstart and vend.
   */
  static struct vmap_area *alloc_vmap_area(unsigned long size,
  				unsigned long align,
  				unsigned long vstart, unsigned long vend,
  				int node, gfp_t gfp_mask)
  {
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1021
  	struct vmap_area *va, *pva;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1022
  	unsigned long addr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1023
  	int purged = 0;
7766970cc   Nick Piggin   mm: vmap fix over...
1024
  	BUG_ON(!size);
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1025
  	BUG_ON(offset_in_page(size));
89699605f   Nick Piggin   mm: vmap area cache
1026
  	BUG_ON(!is_power_of_2(align));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1027

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1028
1029
  	if (unlikely(!vmap_initialized))
  		return ERR_PTR(-EBUSY);
5803ed292   Christoph Hellwig   mm: mark all call...
1030
  	might_sleep();
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1031

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1032
  	va = kmem_cache_alloc_node(vmap_area_cachep,
db64fe022   Nick Piggin   mm: rewrite vmap ...
1033
1034
1035
  			gfp_mask & GFP_RECLAIM_MASK, node);
  	if (unlikely(!va))
  		return ERR_PTR(-ENOMEM);
7f88f88f8   Catalin Marinas   mm: kmemleak: avo...
1036
1037
1038
1039
1040
  	/*
  	 * Only scan the relevant parts containing pointers to other objects
  	 * to avoid false negatives.
  	 */
  	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1041
  retry:
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
  	/*
  	 * Preload this CPU with one extra vmap_area object to ensure
  	 * that we have it available when fit type of free area is
  	 * NE_FIT_TYPE.
  	 *
  	 * The preload is done in non-atomic context, thus it allows us
  	 * to use more permissive allocation masks to be more stable under
  	 * low memory condition and high memory pressure.
  	 *
  	 * Even if it fails we do not really care about that. Just proceed
  	 * as it is. "overflow" path will refill the cache we allocate from.
  	 */
  	preempt_disable();
  	if (!__this_cpu_read(ne_fit_preload_node)) {
  		preempt_enable();
  		pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node);
  		preempt_disable();
  
  		if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva)) {
  			if (pva)
  				kmem_cache_free(vmap_area_cachep, pva);
  		}
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1065
  	spin_lock(&vmap_area_lock);
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1066
  	preempt_enable();
89699605f   Nick Piggin   mm: vmap area cache
1067

afd07389d   Uladzislau Rezki (Sony)   mm/vmalloc.c: fix...
1068
  	/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1069
1070
  	 * If an allocation fails, the "vend" address is
  	 * returned. Therefore trigger the overflow path.
afd07389d   Uladzislau Rezki (Sony)   mm/vmalloc.c: fix...
1071
  	 */
cacca6baf   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1072
  	addr = __alloc_vmap_area(size, align, vstart, vend);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1073
  	if (unlikely(addr == vend))
89699605f   Nick Piggin   mm: vmap area cache
1074
  		goto overflow;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1075
1076
1077
  
  	va->va_start = addr;
  	va->va_end = addr + size;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
1078
  	va->vm = NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1079
  	insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1080
  	spin_unlock(&vmap_area_lock);
61e165578   Wang Xiaoqiang   mm/vmalloc.c: use...
1081
  	BUG_ON(!IS_ALIGNED(va->va_start, align));
89699605f   Nick Piggin   mm: vmap area cache
1082
1083
  	BUG_ON(va->va_start < vstart);
  	BUG_ON(va->va_end > vend);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1084
  	return va;
89699605f   Nick Piggin   mm: vmap area cache
1085
1086
1087
1088
1089
1090
1091
1092
  
  overflow:
  	spin_unlock(&vmap_area_lock);
  	if (!purged) {
  		purge_vmap_area_lazy();
  		purged = 1;
  		goto retry;
  	}
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1093
1094
1095
1096
1097
1098
1099
1100
1101
  
  	if (gfpflags_allow_blocking(gfp_mask)) {
  		unsigned long freed = 0;
  		blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
  		if (freed > 0) {
  			purged = 0;
  			goto retry;
  		}
  	}
03497d761   Florian Fainelli   mm: Silence vmap(...
1102
  	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
756a025f0   Joe Perches   mm: coalesce spli...
1103
1104
1105
  		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size
  ",
  			size);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1106
1107
  
  	kmem_cache_free(vmap_area_cachep, va);
89699605f   Nick Piggin   mm: vmap area cache
1108
  	return ERR_PTR(-EBUSY);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1109
  }
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
  int register_vmap_purge_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_register(&vmap_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);
  
  int unregister_vmap_purge_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1121
1122
  static void __free_vmap_area(struct vmap_area *va)
  {
ca23e405e   Tejun Heo   vmalloc: implemen...
1123
  	/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1124
  	 * Remove from the busy tree/list.
ca23e405e   Tejun Heo   vmalloc: implemen...
1125
  	 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1126
  	unlink_va(va, &vmap_area_root);
ca23e405e   Tejun Heo   vmalloc: implemen...
1127

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1128
1129
1130
1131
1132
  	/*
  	 * Merge VA with its neighbors, otherwise just add it.
  	 */
  	merge_or_add_vmap_area(va,
  		&free_vmap_area_root, &free_vmap_area_list);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
  }
  
  /*
   * Free a region of KVA allocated by alloc_vmap_area
   */
  static void free_vmap_area(struct vmap_area *va)
  {
  	spin_lock(&vmap_area_lock);
  	__free_vmap_area(va);
  	spin_unlock(&vmap_area_lock);
  }
  
  /*
   * Clear the pagetable entries of a given vmap_area
   */
  static void unmap_vmap_area(struct vmap_area *va)
  {
  	vunmap_page_range(va->va_start, va->va_end);
  }
  
  /*
   * lazy_max_pages is the maximum amount of virtual address space we gather up
   * before attempting to purge with a TLB flush.
   *
   * There is a tradeoff here: a larger number will cover more kernel page tables
   * and take slightly longer to purge, but it will linearly reduce the number of
   * global TLB flushes that must be performed. It would seem natural to scale
   * this number up linearly with the number of CPUs (because vmapping activity
   * could also scale linearly with the number of CPUs), however it is likely
   * that in practice, workloads might be constrained in other ways that mean
   * vmap activity will not scale linearly with CPUs. Also, I want to be
   * conservative and not introduce a big latency on huge systems, so go with
   * a less aggressive log scale. It will still be an improvement over the old
   * code, and it will be simple to change the scale factor if we find that it
   * becomes a problem on bigger systems.
   */
  static unsigned long lazy_max_pages(void)
  {
  	unsigned int log;
  
  	log = fls(num_online_cpus());
  
  	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
  }
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1177
  static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1178

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1179
1180
1181
1182
1183
  /*
   * Serialize vmap purging.  There is no actual criticial section protected
   * by this look, but we want to avoid concurrent calls for performance
   * reasons and to make the pcpu_get_vm_areas more deterministic.
   */
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1184
  static DEFINE_MUTEX(vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1185

02b709df8   Nick Piggin   mm: purge fragmen...
1186
1187
  /* for per-CPU blocks */
  static void purge_fragmented_blocks_allcpus(void);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1188
  /*
3ee48b6af   Cliff Wickman   mm, x86: Saving v...
1189
1190
1191
1192
1193
   * called before a call to iounmap() if the caller wants vm_area_struct's
   * immediately freed.
   */
  void set_iounmap_nonlazy(void)
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1194
  	atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
3ee48b6af   Cliff Wickman   mm, x86: Saving v...
1195
1196
1197
  }
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1198
   * Purges all lazily-freed vmap areas.
db64fe022   Nick Piggin   mm: rewrite vmap ...
1199
   */
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1200
  static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1201
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1202
  	unsigned long resched_threshold;
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1203
  	struct llist_node *valist;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1204
  	struct vmap_area *va;
cbb766766   Vegard Nossum   mm: fix lazy vmap...
1205
  	struct vmap_area *n_va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1206

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1207
  	lockdep_assert_held(&vmap_purge_lock);
02b709df8   Nick Piggin   mm: purge fragmen...
1208

80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1209
  	valist = llist_del_all(&vmap_purge_list);
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1210
1211
1212
1213
  	if (unlikely(valist == NULL))
  		return false;
  
  	/*
3f8fd02b1   Joerg Roedel   mm/vmalloc: Sync ...
1214
1215
1216
1217
1218
1219
  	 * First make sure the mappings are removed from all page-tables
  	 * before they are freed.
  	 */
  	vmalloc_sync_all();
  
  	/*
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1220
1221
1222
  	 * TODO: to calculate a flush range without looping.
  	 * The list can be up to lazy_max_pages() elements.
  	 */
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1223
  	llist_for_each_entry(va, valist, purge_list) {
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1224
1225
1226
1227
  		if (va->va_start < start)
  			start = va->va_start;
  		if (va->va_end > end)
  			end = va->va_end;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1228
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1229

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1230
  	flush_tlb_kernel_range(start, end);
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1231
  	resched_threshold = lazy_max_pages() << 1;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1232

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1233
  	spin_lock(&vmap_area_lock);
763b218dd   Joel Fernandes   mm: add preempt p...
1234
  	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1235
  		unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
763b218dd   Joel Fernandes   mm: add preempt p...
1236

dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1237
1238
1239
1240
1241
1242
1243
  		/*
  		 * Finally insert or merge lazily-freed area. It is
  		 * detached and there is no need to "unlink" it from
  		 * anything.
  		 */
  		merge_or_add_vmap_area(va,
  			&free_vmap_area_root, &free_vmap_area_list);
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1244
  		atomic_long_sub(nr, &vmap_lazy_nr);
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1245

4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1246
  		if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1247
  			cond_resched_lock(&vmap_area_lock);
763b218dd   Joel Fernandes   mm: add preempt p...
1248
  	}
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1249
1250
  	spin_unlock(&vmap_area_lock);
  	return true;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1251
1252
1253
  }
  
  /*
496850e5f   Nick Piggin   mm: vmalloc failu...
1254
1255
1256
1257
1258
   * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
   * is already purging.
   */
  static void try_purge_vmap_area_lazy(void)
  {
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1259
  	if (mutex_trylock(&vmap_purge_lock)) {
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1260
  		__purge_vmap_area_lazy(ULONG_MAX, 0);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1261
  		mutex_unlock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1262
  	}
496850e5f   Nick Piggin   mm: vmalloc failu...
1263
1264
1265
  }
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1266
1267
1268
1269
   * Kick off a purge of the outstanding lazy areas.
   */
  static void purge_vmap_area_lazy(void)
  {
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1270
  	mutex_lock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1271
1272
  	purge_fragmented_blocks_allcpus();
  	__purge_vmap_area_lazy(ULONG_MAX, 0);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1273
  	mutex_unlock(&vmap_purge_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1274
1275
1276
  }
  
  /*
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1277
1278
1279
   * Free a vmap area, caller ensuring that the area has been unmapped
   * and flush_cache_vunmap had been called for the correct range
   * previously.
db64fe022   Nick Piggin   mm: rewrite vmap ...
1280
   */
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1281
  static void free_vmap_area_noflush(struct vmap_area *va)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1282
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1283
  	unsigned long nr_lazy;
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1284

dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1285
1286
1287
  	spin_lock(&vmap_area_lock);
  	unlink_va(va, &vmap_area_root);
  	spin_unlock(&vmap_area_lock);
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1288
1289
  	nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
  				PAGE_SHIFT, &vmap_lazy_nr);
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1290
1291
1292
1293
1294
  
  	/* After this point, we may free va at any time */
  	llist_add(&va->purge_list, &vmap_purge_list);
  
  	if (unlikely(nr_lazy > lazy_max_pages()))
496850e5f   Nick Piggin   mm: vmalloc failu...
1295
  		try_purge_vmap_area_lazy();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1296
  }
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1297
1298
1299
1300
1301
1302
  /*
   * Free and unmap a vmap area
   */
  static void free_unmap_vmap_area(struct vmap_area *va)
  {
  	flush_cache_vunmap(va->va_start, va->va_end);
c8eef01e2   Christoph Hellwig   mm: remove free_u...
1303
  	unmap_vmap_area(va);
d30dce351   Vlastimil Babka   mm, debug_pageall...
1304
  	if (debug_pagealloc_enabled_static())
82a2e924f   Chintan Pandya   mm: vmalloc: clea...
1305
  		flush_tlb_kernel_range(va->va_start, va->va_end);
c8eef01e2   Christoph Hellwig   mm: remove free_u...
1306
  	free_vmap_area_noflush(va);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1307
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
  static struct vmap_area *find_vmap_area(unsigned long addr)
  {
  	struct vmap_area *va;
  
  	spin_lock(&vmap_area_lock);
  	va = __find_vmap_area(addr);
  	spin_unlock(&vmap_area_lock);
  
  	return va;
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
  /*** Per cpu kva allocator ***/
  
  /*
   * vmap space is limited especially on 32 bit architectures. Ensure there is
   * room for at least 16 percpu vmap blocks per CPU.
   */
  /*
   * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
   * to #define VMALLOC_SPACE		(VMALLOC_END-VMALLOC_START). Guess
   * instead (we just need a rough idea)
   */
  #if BITS_PER_LONG == 32
  #define VMALLOC_SPACE		(128UL*1024*1024)
  #else
  #define VMALLOC_SPACE		(128UL*1024*1024*1024)
  #endif
  
  #define VMALLOC_PAGES		(VMALLOC_SPACE / PAGE_SIZE)
  #define VMAP_MAX_ALLOC		BITS_PER_LONG	/* 256K with 4K pages */
  #define VMAP_BBMAP_BITS_MAX	1024	/* 4MB with 4K pages */
  #define VMAP_BBMAP_BITS_MIN	(VMAP_MAX_ALLOC*2)
  #define VMAP_MIN(x, y)		((x) < (y) ? (x) : (y)) /* can't use min() */
  #define VMAP_MAX(x, y)		((x) > (y) ? (x) : (y)) /* can't use max() */
f982f9151   Clemens Ladisch   mm: fix wrong vma...
1341
1342
1343
1344
  #define VMAP_BBMAP_BITS		\
  		VMAP_MIN(VMAP_BBMAP_BITS_MAX,	\
  		VMAP_MAX(VMAP_BBMAP_BITS_MIN,	\
  			VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
db64fe022   Nick Piggin   mm: rewrite vmap ...
1345
1346
1347
1348
1349
1350
  
  #define VMAP_BLOCK_SIZE		(VMAP_BBMAP_BITS * PAGE_SIZE)
  
  struct vmap_block_queue {
  	spinlock_t lock;
  	struct list_head free;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1351
1352
1353
1354
1355
  };
  
  struct vmap_block {
  	spinlock_t lock;
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1356
  	unsigned long free, dirty;
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1357
  	unsigned long dirty_min, dirty_max; /*< dirty range */
de5604231   Nick Piggin   mm: percpu-vmap f...
1358
1359
  	struct list_head free_list;
  	struct rcu_head rcu_head;
02b709df8   Nick Piggin   mm: purge fragmen...
1360
  	struct list_head purge;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
  };
  
  /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
  static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
  
  /*
   * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
   * in the free path. Could get rid of this if we change the API to return a
   * "cookie" from alloc, to be passed to free. But no big deal yet.
   */
  static DEFINE_SPINLOCK(vmap_block_tree_lock);
  static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
  
  /*
   * We should probably have a fallback mechanism to allocate virtual memory
   * out of partially filled vmap blocks. However vmap block sizing should be
   * fairly reasonable according to the vmalloc size, so it shouldn't be a
   * big problem.
   */
  
  static unsigned long addr_to_vb_idx(unsigned long addr)
  {
  	addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
  	addr /= VMAP_BLOCK_SIZE;
  	return addr;
  }
cf725ce27   Roman Pen   mm/vmalloc: occup...
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
  static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
  {
  	unsigned long addr;
  
  	addr = va_start + (pages_off << PAGE_SHIFT);
  	BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
  	return (void *)addr;
  }
  
  /**
   * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
   *                  block. Of course pages number can't exceed VMAP_BBMAP_BITS
   * @order:    how many 2^order pages should be occupied in newly allocated block
   * @gfp_mask: flags for the page level allocator
   *
a862f68a8   Mike Rapoport   docs/core-api/mm:...
1402
   * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
cf725ce27   Roman Pen   mm/vmalloc: occup...
1403
1404
   */
  static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1405
1406
1407
1408
1409
1410
  {
  	struct vmap_block_queue *vbq;
  	struct vmap_block *vb;
  	struct vmap_area *va;
  	unsigned long vb_idx;
  	int node, err;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1411
  	void *vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
  
  	node = numa_node_id();
  
  	vb = kmalloc_node(sizeof(struct vmap_block),
  			gfp_mask & GFP_RECLAIM_MASK, node);
  	if (unlikely(!vb))
  		return ERR_PTR(-ENOMEM);
  
  	va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
  					VMALLOC_START, VMALLOC_END,
  					node, gfp_mask);
ddf9c6d47   Tobias Klauser   vmalloc: remove r...
1423
  	if (IS_ERR(va)) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1424
  		kfree(vb);
e7d863407   Julia Lawall   mm: use ERR_CAST
1425
  		return ERR_CAST(va);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1426
1427
1428
1429
1430
1431
1432
1433
  	}
  
  	err = radix_tree_preload(gfp_mask);
  	if (unlikely(err)) {
  		kfree(vb);
  		free_vmap_area(va);
  		return ERR_PTR(err);
  	}
cf725ce27   Roman Pen   mm/vmalloc: occup...
1434
  	vaddr = vmap_block_vaddr(va->va_start, 0);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1435
1436
  	spin_lock_init(&vb->lock);
  	vb->va = va;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1437
1438
1439
  	/* At least something should be left free */
  	BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
  	vb->free = VMAP_BBMAP_BITS - (1UL << order);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1440
  	vb->dirty = 0;
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1441
1442
  	vb->dirty_min = VMAP_BBMAP_BITS;
  	vb->dirty_max = 0;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1443
  	INIT_LIST_HEAD(&vb->free_list);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1444
1445
1446
1447
1448
1449
1450
1451
1452
  
  	vb_idx = addr_to_vb_idx(va->va_start);
  	spin_lock(&vmap_block_tree_lock);
  	err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
  	spin_unlock(&vmap_block_tree_lock);
  	BUG_ON(err);
  	radix_tree_preload_end();
  
  	vbq = &get_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1453
  	spin_lock(&vbq->lock);
68ac546f2   Roman Pen   mm/vmalloc: fix p...
1454
  	list_add_tail_rcu(&vb->free_list, &vbq->free);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1455
  	spin_unlock(&vbq->lock);
3f04ba859   Tejun Heo   vmalloc: fix use ...
1456
  	put_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1457

cf725ce27   Roman Pen   mm/vmalloc: occup...
1458
  	return vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1459
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1460
1461
1462
1463
  static void free_vmap_block(struct vmap_block *vb)
  {
  	struct vmap_block *tmp;
  	unsigned long vb_idx;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1464
1465
1466
1467
1468
  	vb_idx = addr_to_vb_idx(vb->va->va_start);
  	spin_lock(&vmap_block_tree_lock);
  	tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
  	spin_unlock(&vmap_block_tree_lock);
  	BUG_ON(tmp != vb);
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1469
  	free_vmap_area_noflush(vb->va);
22a3c7d18   Lai Jiangshan   vmalloc,rcu: Conv...
1470
  	kfree_rcu(vb, rcu_head);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1471
  }
02b709df8   Nick Piggin   mm: purge fragmen...
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
  static void purge_fragmented_blocks(int cpu)
  {
  	LIST_HEAD(purge);
  	struct vmap_block *vb;
  	struct vmap_block *n_vb;
  	struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
  
  	rcu_read_lock();
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  
  		if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
  			continue;
  
  		spin_lock(&vb->lock);
  		if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
  			vb->free = 0; /* prevent further allocs after releasing lock */
  			vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1489
1490
  			vb->dirty_min = 0;
  			vb->dirty_max = VMAP_BBMAP_BITS;
02b709df8   Nick Piggin   mm: purge fragmen...
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
  			spin_lock(&vbq->lock);
  			list_del_rcu(&vb->free_list);
  			spin_unlock(&vbq->lock);
  			spin_unlock(&vb->lock);
  			list_add_tail(&vb->purge, &purge);
  		} else
  			spin_unlock(&vb->lock);
  	}
  	rcu_read_unlock();
  
  	list_for_each_entry_safe(vb, n_vb, &purge, purge) {
  		list_del(&vb->purge);
  		free_vmap_block(vb);
  	}
  }
02b709df8   Nick Piggin   mm: purge fragmen...
1506
1507
1508
1509
1510
1511
1512
  static void purge_fragmented_blocks_allcpus(void)
  {
  	int cpu;
  
  	for_each_possible_cpu(cpu)
  		purge_fragmented_blocks(cpu);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1513
1514
1515
1516
  static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  {
  	struct vmap_block_queue *vbq;
  	struct vmap_block *vb;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1517
  	void *vaddr = NULL;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1518
  	unsigned int order;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1519
  	BUG_ON(offset_in_page(size));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1520
  	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
aa91c4d89   Jan Kara   mm: make vb_alloc...
1521
1522
1523
1524
1525
1526
1527
1528
  	if (WARN_ON(size == 0)) {
  		/*
  		 * Allocating 0 bytes isn't what caller wants since
  		 * get_order(0) returns funny result. Just warn and terminate
  		 * early.
  		 */
  		return NULL;
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1529
  	order = get_order(size);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1530
1531
1532
  	rcu_read_lock();
  	vbq = &get_cpu_var(vmap_block_queue);
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
cf725ce27   Roman Pen   mm/vmalloc: occup...
1533
  		unsigned long pages_off;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1534
1535
  
  		spin_lock(&vb->lock);
cf725ce27   Roman Pen   mm/vmalloc: occup...
1536
1537
1538
1539
  		if (vb->free < (1UL << order)) {
  			spin_unlock(&vb->lock);
  			continue;
  		}
02b709df8   Nick Piggin   mm: purge fragmen...
1540

cf725ce27   Roman Pen   mm/vmalloc: occup...
1541
1542
  		pages_off = VMAP_BBMAP_BITS - vb->free;
  		vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
02b709df8   Nick Piggin   mm: purge fragmen...
1543
1544
1545
1546
1547
1548
  		vb->free -= 1UL << order;
  		if (vb->free == 0) {
  			spin_lock(&vbq->lock);
  			list_del_rcu(&vb->free_list);
  			spin_unlock(&vbq->lock);
  		}
cf725ce27   Roman Pen   mm/vmalloc: occup...
1549

02b709df8   Nick Piggin   mm: purge fragmen...
1550
1551
  		spin_unlock(&vb->lock);
  		break;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1552
  	}
02b709df8   Nick Piggin   mm: purge fragmen...
1553

3f04ba859   Tejun Heo   vmalloc: fix use ...
1554
  	put_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1555
  	rcu_read_unlock();
cf725ce27   Roman Pen   mm/vmalloc: occup...
1556
1557
1558
  	/* Allocate new block if nothing was found */
  	if (!vaddr)
  		vaddr = new_vmap_block(order, gfp_mask);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1559

cf725ce27   Roman Pen   mm/vmalloc: occup...
1560
  	return vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1561
1562
1563
1564
1565
1566
1567
1568
  }
  
  static void vb_free(const void *addr, unsigned long size)
  {
  	unsigned long offset;
  	unsigned long vb_idx;
  	unsigned int order;
  	struct vmap_block *vb;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1569
  	BUG_ON(offset_in_page(size));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1570
  	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1571
1572
  
  	flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1573
1574
1575
  	order = get_order(size);
  
  	offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1576
  	offset >>= PAGE_SHIFT;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1577
1578
1579
1580
1581
1582
  
  	vb_idx = addr_to_vb_idx((unsigned long)addr);
  	rcu_read_lock();
  	vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
  	rcu_read_unlock();
  	BUG_ON(!vb);
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1583
  	vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
d30dce351   Vlastimil Babka   mm, debug_pageall...
1584
  	if (debug_pagealloc_enabled_static())
82a2e924f   Chintan Pandya   mm: vmalloc: clea...
1585
1586
  		flush_tlb_kernel_range((unsigned long)addr,
  					(unsigned long)addr + size);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1587
  	spin_lock(&vb->lock);
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1588
1589
1590
1591
  
  	/* Expand dirty range */
  	vb->dirty_min = min(vb->dirty_min, offset);
  	vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
d086817dc   MinChan Kim   vmap: remove need...
1592

db64fe022   Nick Piggin   mm: rewrite vmap ...
1593
1594
  	vb->dirty += 1UL << order;
  	if (vb->dirty == VMAP_BBMAP_BITS) {
de5604231   Nick Piggin   mm: percpu-vmap f...
1595
  		BUG_ON(vb->free);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1596
1597
1598
1599
1600
  		spin_unlock(&vb->lock);
  		free_vmap_block(vb);
  	} else
  		spin_unlock(&vb->lock);
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
1601
  static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1602
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1603
  	int cpu;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1604

9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1605
1606
  	if (unlikely(!vmap_initialized))
  		return;
5803ed292   Christoph Hellwig   mm: mark all call...
1607
  	might_sleep();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1608
1609
1610
1611
1612
1613
  	for_each_possible_cpu(cpu) {
  		struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
  		struct vmap_block *vb;
  
  		rcu_read_lock();
  		list_for_each_entry_rcu(vb, &vbq->free, free_list) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1614
  			spin_lock(&vb->lock);
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1615
1616
  			if (vb->dirty) {
  				unsigned long va_start = vb->va->va_start;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1617
  				unsigned long s, e;
b136be5e0   Joonsoo Kim   mm, vmalloc: use ...
1618

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1619
1620
  				s = va_start + (vb->dirty_min << PAGE_SHIFT);
  				e = va_start + (vb->dirty_max << PAGE_SHIFT);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1621

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1622
1623
  				start = min(s, start);
  				end   = max(e, end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1624

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1625
  				flush = 1;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1626
1627
1628
1629
1630
  			}
  			spin_unlock(&vb->lock);
  		}
  		rcu_read_unlock();
  	}
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1631
  	mutex_lock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1632
1633
1634
  	purge_fragmented_blocks_allcpus();
  	if (!__purge_vmap_area_lazy(start, end) && flush)
  		flush_tlb_kernel_range(start, end);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1635
  	mutex_unlock(&vmap_purge_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1636
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
  
  /**
   * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
   *
   * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
   * to amortize TLB flushing overheads. What this means is that any page you
   * have now, may, in a former life, have been mapped into kernel virtual
   * address by the vmap layer and so there might be some CPUs with TLB entries
   * still referencing that page (additional to the regular 1:1 kernel mapping).
   *
   * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
   * be sure that none of the pages we have control over will have any aliases
   * from the vmap layer.
   */
  void vm_unmap_aliases(void)
  {
  	unsigned long start = ULONG_MAX, end = 0;
  	int flush = 0;
  
  	_vm_unmap_aliases(start, end, flush);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1658
1659
1660
1661
1662
1663
1664
1665
1666
  EXPORT_SYMBOL_GPL(vm_unmap_aliases);
  
  /**
   * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
   * @mem: the pointer returned by vm_map_ram
   * @count: the count passed to that vm_map_ram call (cannot unmap partial)
   */
  void vm_unmap_ram(const void *mem, unsigned int count)
  {
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
1667
  	unsigned long size = (unsigned long)count << PAGE_SHIFT;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1668
  	unsigned long addr = (unsigned long)mem;
9c3acf604   Christoph Hellwig   mm: remove free_u...
1669
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1670

5803ed292   Christoph Hellwig   mm: mark all call...
1671
  	might_sleep();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1672
1673
1674
  	BUG_ON(!addr);
  	BUG_ON(addr < VMALLOC_START);
  	BUG_ON(addr > VMALLOC_END);
a1c0b1a07   Shawn Lin   mm/vmalloc: use P...
1675
  	BUG_ON(!PAGE_ALIGNED(addr));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1676

9c3acf604   Christoph Hellwig   mm: remove free_u...
1677
  	if (likely(count <= VMAP_MAX_ALLOC)) {
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
1678
  		debug_check_no_locks_freed(mem, size);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1679
  		vb_free(mem, size);
9c3acf604   Christoph Hellwig   mm: remove free_u...
1680
1681
1682
1683
1684
  		return;
  	}
  
  	va = find_vmap_area(addr);
  	BUG_ON(!va);
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
1685
1686
  	debug_check_no_locks_freed((void *)va->va_start,
  				    (va->va_end - va->va_start));
9c3acf604   Christoph Hellwig   mm: remove free_u...
1687
  	free_unmap_vmap_area(va);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1688
1689
1690
1691
1692
1693
1694
1695
1696
  }
  EXPORT_SYMBOL(vm_unmap_ram);
  
  /**
   * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
   * @pages: an array of pointers to the pages to be mapped
   * @count: number of pages
   * @node: prefer to allocate data structures on this node
   * @prot: memory protection to use. PAGE_KERNEL for regular RAM
e99c97ade   Randy Dunlap   mm: fix kernel-do...
1697
   *
364376383   Gioh Kim   mm/vmalloc.c: enh...
1698
1699
1700
1701
1702
1703
   * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
   * faster than vmap so it's good.  But if you mix long-life and short-life
   * objects with vm_map_ram(), it could consume lots of address space through
   * fragmentation (especially on a 32bit machine).  You could see failures in
   * the end.  Please use this function for short-lived objects.
   *
e99c97ade   Randy Dunlap   mm: fix kernel-do...
1704
   * Returns: a pointer to the address that has been mapped, or %NULL on failure
db64fe022   Nick Piggin   mm: rewrite vmap ...
1705
1706
1707
   */
  void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
  {
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
1708
  	unsigned long size = (unsigned long)count << PAGE_SHIFT;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
  	unsigned long addr;
  	void *mem;
  
  	if (likely(count <= VMAP_MAX_ALLOC)) {
  		mem = vb_alloc(size, GFP_KERNEL);
  		if (IS_ERR(mem))
  			return NULL;
  		addr = (unsigned long)mem;
  	} else {
  		struct vmap_area *va;
  		va = alloc_vmap_area(size, PAGE_SIZE,
  				VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
  		if (IS_ERR(va))
  			return NULL;
  
  		addr = va->va_start;
  		mem = (void *)addr;
  	}
  	if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
  		vm_unmap_ram(mem, count);
  		return NULL;
  	}
  	return mem;
  }
  EXPORT_SYMBOL(vm_map_ram);
4341fa454   Joonsoo Kim   mm, vmalloc: remo...
1734
  static struct vm_struct *vmlist __initdata;
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
1735

f0aa66179   Tejun Heo   vmalloc: implemen...
1736
  /**
be9b7335e   Nicolas Pitre   mm: add vm_area_a...
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
   * vm_area_add_early - add vmap area early during boot
   * @vm: vm_struct to add
   *
   * This function is used to add fixed kernel vm area to vmlist before
   * vmalloc_init() is called.  @vm->addr, @vm->size, and @vm->flags
   * should contain proper values and the other fields should be zero.
   *
   * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
   */
  void __init vm_area_add_early(struct vm_struct *vm)
  {
  	struct vm_struct *tmp, **p;
  
  	BUG_ON(vmap_initialized);
  	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
  		if (tmp->addr >= vm->addr) {
  			BUG_ON(tmp->addr < vm->addr + vm->size);
  			break;
  		} else
  			BUG_ON(tmp->addr + tmp->size > vm->addr);
  	}
  	vm->next = *p;
  	*p = vm;
  }
  
  /**
f0aa66179   Tejun Heo   vmalloc: implemen...
1763
1764
   * vm_area_register_early - register vmap area early during boot
   * @vm: vm_struct to register
c0c0a2937   Tejun Heo   vmalloc: add @ali...
1765
   * @align: requested alignment
f0aa66179   Tejun Heo   vmalloc: implemen...
1766
1767
1768
1769
1770
1771
1772
1773
   *
   * This function is used to register kernel vm area before
   * vmalloc_init() is called.  @vm->size and @vm->flags should contain
   * proper values on entry and other fields should be zero.  On return,
   * vm->addr contains the allocated address.
   *
   * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
   */
c0c0a2937   Tejun Heo   vmalloc: add @ali...
1774
  void __init vm_area_register_early(struct vm_struct *vm, size_t align)
f0aa66179   Tejun Heo   vmalloc: implemen...
1775
1776
  {
  	static size_t vm_init_off __initdata;
c0c0a2937   Tejun Heo   vmalloc: add @ali...
1777
1778
1779
1780
  	unsigned long addr;
  
  	addr = ALIGN(VMALLOC_START + vm_init_off, align);
  	vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
f0aa66179   Tejun Heo   vmalloc: implemen...
1781

c0c0a2937   Tejun Heo   vmalloc: add @ali...
1782
  	vm->addr = (void *)addr;
f0aa66179   Tejun Heo   vmalloc: implemen...
1783

be9b7335e   Nicolas Pitre   mm: add vm_area_a...
1784
  	vm_area_add_early(vm);
f0aa66179   Tejun Heo   vmalloc: implemen...
1785
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
  static void vmap_init_free_space(void)
  {
  	unsigned long vmap_start = 1;
  	const unsigned long vmap_end = ULONG_MAX;
  	struct vmap_area *busy, *free;
  
  	/*
  	 *     B     F     B     B     B     F
  	 * -|-----|.....|-----|-----|-----|.....|-
  	 *  |           The KVA space           |
  	 *  |<--------------------------------->|
  	 */
  	list_for_each_entry(busy, &vmap_area_list, list) {
  		if (busy->va_start - vmap_start > 0) {
  			free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  			if (!WARN_ON_ONCE(!free)) {
  				free->va_start = vmap_start;
  				free->va_end = busy->va_start;
  
  				insert_vmap_area_augment(free, NULL,
  					&free_vmap_area_root,
  						&free_vmap_area_list);
  			}
  		}
  
  		vmap_start = busy->va_end;
  	}
  
  	if (vmap_end - vmap_start > 0) {
  		free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  		if (!WARN_ON_ONCE(!free)) {
  			free->va_start = vmap_start;
  			free->va_end = vmap_end;
  
  			insert_vmap_area_augment(free, NULL,
  				&free_vmap_area_root,
  					&free_vmap_area_list);
  		}
  	}
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1826
1827
  void __init vmalloc_init(void)
  {
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1828
1829
  	struct vmap_area *va;
  	struct vm_struct *tmp;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1830
  	int i;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1831
1832
1833
1834
  	/*
  	 * Create the cache for vmap_area objects.
  	 */
  	vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1835
1836
  	for_each_possible_cpu(i) {
  		struct vmap_block_queue *vbq;
32fcfd407   Al Viro   make vfree() safe...
1837
  		struct vfree_deferred *p;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1838
1839
1840
1841
  
  		vbq = &per_cpu(vmap_block_queue, i);
  		spin_lock_init(&vbq->lock);
  		INIT_LIST_HEAD(&vbq->free);
32fcfd407   Al Viro   make vfree() safe...
1842
1843
1844
  		p = &per_cpu(vfree_deferred, i);
  		init_llist_head(&p->list);
  		INIT_WORK(&p->wq, free_work);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1845
  	}
9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1846

822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1847
1848
  	/* Import existing vmlist entries. */
  	for (tmp = vmlist; tmp; tmp = tmp->next) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1849
1850
1851
  		va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  		if (WARN_ON_ONCE(!va))
  			continue;
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1852
1853
  		va->va_start = (unsigned long)tmp->addr;
  		va->va_end = va->va_start + tmp->size;
dbda591d9   KyongHo   mm: fix faulty in...
1854
  		va->vm = tmp;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1855
  		insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1856
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
1857

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1858
1859
1860
1861
  	/*
  	 * Now we can initialize a free vmap space.
  	 */
  	vmap_init_free_space();
9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1862
  	vmap_initialized = true;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1863
  }
8fc489850   Tejun Heo   vmalloc: add un/m...
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
  /**
   * map_kernel_range_noflush - map kernel VM area with the specified pages
   * @addr: start of the VM area to map
   * @size: size of the VM area to map
   * @prot: page protection flags to use
   * @pages: pages to map
   *
   * Map PFN_UP(@size) pages at @addr.  The VM area @addr and @size
   * specify should have been allocated using get_vm_area() and its
   * friends.
   *
   * NOTE:
   * This function does NOT do any cache flushing.  The caller is
   * responsible for calling flush_cache_vmap() on to-be-mapped areas
   * before calling this function.
   *
   * RETURNS:
   * The number of pages mapped on success, -errno on failure.
   */
  int map_kernel_range_noflush(unsigned long addr, unsigned long size,
  			     pgprot_t prot, struct page **pages)
  {
  	return vmap_page_range_noflush(addr, addr + size, prot, pages);
  }
  
  /**
   * unmap_kernel_range_noflush - unmap kernel VM area
   * @addr: start of the VM area to unmap
   * @size: size of the VM area to unmap
   *
   * Unmap PFN_UP(@size) pages at @addr.  The VM area @addr and @size
   * specify should have been allocated using get_vm_area() and its
   * friends.
   *
   * NOTE:
   * This function does NOT do any cache flushing.  The caller is
   * responsible for calling flush_cache_vunmap() on to-be-mapped areas
   * before calling this function and flush_tlb_kernel_range() after.
   */
  void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
  {
  	vunmap_page_range(addr, addr + size);
  }
81e88fdc4   Huang Ying   ACPI, APEI, Gener...
1907
  EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
8fc489850   Tejun Heo   vmalloc: add un/m...
1908
1909
1910
1911
1912
1913
1914
1915
1916
  
  /**
   * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
   * @addr: start of the VM area to unmap
   * @size: size of the VM area to unmap
   *
   * Similar to unmap_kernel_range_noflush() but flushes vcache before
   * the unmapping and tlb after.
   */
db64fe022   Nick Piggin   mm: rewrite vmap ...
1917
1918
1919
  void unmap_kernel_range(unsigned long addr, unsigned long size)
  {
  	unsigned long end = addr + size;
f6fcba701   Tejun Heo   vmalloc: call flu...
1920
1921
  
  	flush_cache_vunmap(addr, end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1922
1923
1924
  	vunmap_page_range(addr, end);
  	flush_tlb_kernel_range(addr, end);
  }
93ef6d6ca   Minchan Kim   mm/vmalloc.c: exp...
1925
  EXPORT_SYMBOL_GPL(unmap_kernel_range);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1926

f6f8ed473   WANG Chao   mm/vmalloc.c: cle...
1927
  int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1928
1929
  {
  	unsigned long addr = (unsigned long)area->addr;
762216ab4   Wanpeng Li   mm/vmalloc: use w...
1930
  	unsigned long end = addr + get_vm_area_size(area);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1931
  	int err;
f6f8ed473   WANG Chao   mm/vmalloc.c: cle...
1932
  	err = vmap_page_range(addr, end, prot, pages);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1933

f6f8ed473   WANG Chao   mm/vmalloc.c: cle...
1934
  	return err > 0 ? 0 : err;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1935
1936
  }
  EXPORT_SYMBOL_GPL(map_vm_area);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
1937
  static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
1938
  			      unsigned long flags, const void *caller)
cf88c7900   Tejun Heo   vmalloc: separate...
1939
  {
c69480ade   Joonsoo Kim   mm, vmalloc: prot...
1940
  	spin_lock(&vmap_area_lock);
cf88c7900   Tejun Heo   vmalloc: separate...
1941
1942
1943
1944
  	vm->flags = flags;
  	vm->addr = (void *)va->va_start;
  	vm->size = va->va_end - va->va_start;
  	vm->caller = caller;
db1aecafe   Minchan Kim   mm/vmalloc.c: cha...
1945
  	va->vm = vm;
c69480ade   Joonsoo Kim   mm, vmalloc: prot...
1946
  	spin_unlock(&vmap_area_lock);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
1947
  }
cf88c7900   Tejun Heo   vmalloc: separate...
1948

20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
1949
  static void clear_vm_uninitialized_flag(struct vm_struct *vm)
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
1950
  {
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
1951
  	/*
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
1952
  	 * Before removing VM_UNINITIALIZED,
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
1953
1954
1955
1956
  	 * we should make sure that vm has proper values.
  	 * Pair with smp_rmb() in show_numa_info().
  	 */
  	smp_wmb();
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
1957
  	vm->flags &= ~VM_UNINITIALIZED;
cf88c7900   Tejun Heo   vmalloc: separate...
1958
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1959
  static struct vm_struct *__get_vm_area_node(unsigned long size,
2dca6999e   David Miller   mm, perf_event: M...
1960
  		unsigned long align, unsigned long flags, unsigned long start,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
1961
  		unsigned long end, int node, gfp_t gfp_mask, const void *caller)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1962
  {
0006526d7   Kautuk Consul   mm/vmalloc.c: rem...
1963
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1964
  	struct vm_struct *area;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1965

52fd24ca1   Giridhar Pemmasani   [PATCH] __vmalloc...
1966
  	BUG_ON(in_interrupt());
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1967
  	size = PAGE_ALIGN(size);
31be83095   OGAWA Hirofumi   [PATCH] Fix stran...
1968
1969
  	if (unlikely(!size))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1970

252e5c6e2   zijun_hu   mm/vmalloc.c: fix...
1971
1972
1973
  	if (flags & VM_IOREMAP)
  		align = 1ul << clamp_t(int, get_count_order_long(size),
  				       PAGE_SHIFT, IOREMAP_MAX_ORDER);
cf88c7900   Tejun Heo   vmalloc: separate...
1974
  	area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1975
1976
  	if (unlikely(!area))
  		return NULL;
71394fe50   Andrey Ryabinin   mm: vmalloc: add ...
1977
1978
  	if (!(flags & VM_NO_GUARD))
  		size += PAGE_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1979

db64fe022   Nick Piggin   mm: rewrite vmap ...
1980
1981
1982
1983
  	va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
  	if (IS_ERR(va)) {
  		kfree(area);
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1984
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1985

d82b1d857   Zhang Yanfei   mm, vmalloc: only...
1986
  	setup_vmalloc_vm(area, va, flags, caller);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
1987

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1988
  	return area;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1989
  }
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
1990
1991
1992
  struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
  				unsigned long start, unsigned long end)
  {
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
1993
1994
  	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
  				  GFP_KERNEL, __builtin_return_address(0));
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
1995
  }
5992b6dac   Rusty Russell   lguest: export sy...
1996
  EXPORT_SYMBOL_GPL(__get_vm_area);
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
1997

c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
1998
1999
  struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
  				       unsigned long start, unsigned long end,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
2000
  				       const void *caller)
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2001
  {
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2002
2003
  	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
  				  GFP_KERNEL, caller);
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2004
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2005
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2006
2007
2008
   * get_vm_area - reserve a contiguous kernel virtual area
   * @size:	 size of the area
   * @flags:	 %VM_IOREMAP for I/O mappings or VM_ALLOC
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2009
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2010
2011
2012
   * Search an area of @size in the kernel virtual mapping area,
   * and reserved it for out purposes.  Returns the area descriptor
   * on success or %NULL on failure.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2013
2014
   *
   * Return: the area descriptor on success or %NULL on failure.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2015
2016
2017
   */
  struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
  {
2dca6999e   David Miller   mm, perf_event: M...
2018
  	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2019
2020
  				  NUMA_NO_NODE, GFP_KERNEL,
  				  __builtin_return_address(0));
230169693   Christoph Lameter   vmallocinfo: add ...
2021
2022
2023
  }
  
  struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
2024
  				const void *caller)
230169693   Christoph Lameter   vmallocinfo: add ...
2025
  {
2dca6999e   David Miller   mm, perf_event: M...
2026
  	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2027
  				  NUMA_NO_NODE, GFP_KERNEL, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2028
  }
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2029
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2030
2031
   * find_vm_area - find a continuous kernel virtual area
   * @addr:	  base address
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2032
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2033
2034
2035
   * Search for the kernel VM area starting at @addr, and return it.
   * It is up to the caller to do all required locking to keep the returned
   * pointer valid.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2036
2037
   *
   * Return: pointer to the found area or %NULL on faulure
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2038
2039
   */
  struct vm_struct *find_vm_area(const void *addr)
833423143   Nick Piggin   [PATCH] mm: intro...
2040
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2041
  	struct vmap_area *va;
833423143   Nick Piggin   [PATCH] mm: intro...
2042

db64fe022   Nick Piggin   mm: rewrite vmap ...
2043
  	va = find_vmap_area((unsigned long)addr);
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2044
2045
  	if (!va)
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2046

688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2047
  	return va->vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2048
  }
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2049
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2050
2051
   * remove_vm_area - find and remove a continuous kernel virtual area
   * @addr:	    base address
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2052
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2053
2054
2055
   * Search for the kernel VM area starting at @addr, and remove it.
   * This function returns the found VM area, but using it is NOT safe
   * on SMP machines, except for its size or flags.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2056
2057
   *
   * Return: pointer to the found area or %NULL on faulure
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2058
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2059
  struct vm_struct *remove_vm_area(const void *addr)
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2060
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2061
  	struct vmap_area *va;
5803ed292   Christoph Hellwig   mm: mark all call...
2062
  	might_sleep();
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
2063
2064
  	spin_lock(&vmap_area_lock);
  	va = __find_vmap_area((unsigned long)addr);
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2065
  	if (va && va->vm) {
db1aecafe   Minchan Kim   mm/vmalloc.c: cha...
2066
  		struct vm_struct *vm = va->vm;
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2067

c69480ade   Joonsoo Kim   mm, vmalloc: prot...
2068
  		va->vm = NULL;
c69480ade   Joonsoo Kim   mm, vmalloc: prot...
2069
  		spin_unlock(&vmap_area_lock);
a5af5aa8b   Andrey Ryabinin   kasan, module, vm...
2070
  		kasan_free_shadow(vm);
dd32c2799   KAMEZAWA Hiroyuki   vmalloc: unmap vm...
2071
  		free_unmap_vmap_area(va);
dd32c2799   KAMEZAWA Hiroyuki   vmalloc: unmap vm...
2072

db64fe022   Nick Piggin   mm: rewrite vmap ...
2073
2074
  		return vm;
  	}
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
2075
2076
  
  	spin_unlock(&vmap_area_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2077
  	return NULL;
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2078
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
  static inline void set_area_direct_map(const struct vm_struct *area,
  				       int (*set_direct_map)(struct page *page))
  {
  	int i;
  
  	for (i = 0; i < area->nr_pages; i++)
  		if (page_address(area->pages[i]))
  			set_direct_map(area->pages[i]);
  }
  
  /* Handle removing and resetting vm mappings related to the vm_struct. */
  static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
  {
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2092
2093
  	unsigned long start = ULONG_MAX, end = 0;
  	int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2094
  	int flush_dmap = 0;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2095
  	int i;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
  	remove_vm_area(area->addr);
  
  	/* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
  	if (!flush_reset)
  		return;
  
  	/*
  	 * If not deallocating pages, just do the flush of the VM area and
  	 * return.
  	 */
  	if (!deallocate_pages) {
  		vm_unmap_aliases();
  		return;
  	}
  
  	/*
  	 * If execution gets here, flush the vm mapping and reset the direct
  	 * map. Find the start and end range of the direct mappings to make sure
  	 * the vm_unmap_aliases() flush includes the direct map.
  	 */
  	for (i = 0; i < area->nr_pages; i++) {
8e41f8726   Rick Edgecombe   mm/vmalloc: Fix c...
2117
2118
  		unsigned long addr = (unsigned long)page_address(area->pages[i]);
  		if (addr) {
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2119
  			start = min(addr, start);
8e41f8726   Rick Edgecombe   mm/vmalloc: Fix c...
2120
  			end = max(addr + PAGE_SIZE, end);
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2121
  			flush_dmap = 1;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2122
2123
2124
2125
2126
2127
2128
2129
2130
  		}
  	}
  
  	/*
  	 * Set direct map to something invalid so that it won't be cached if
  	 * there are any accesses after the TLB flush, then flush the TLB and
  	 * reset the direct map permissions to the default.
  	 */
  	set_area_direct_map(area, set_direct_map_invalid_noflush);
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2131
  	_vm_unmap_aliases(start, end, flush_dmap);
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2132
2133
  	set_area_direct_map(area, set_direct_map_default_noflush);
  }
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2134
  static void __vunmap(const void *addr, int deallocate_pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2135
2136
2137
2138
2139
  {
  	struct vm_struct *area;
  
  	if (!addr)
  		return;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2140
2141
  	if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)
  ",
ab15d9b4c   Dan Carpenter   mm/vmalloc.c: unb...
2142
  			addr))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2143
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2144

6ade20327   Liviu Dudau   mm/vmalloc.c: don...
2145
  	area = find_vm_area(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2146
  	if (unlikely(!area)) {
4c8573e25   Arjan van de Ven   Use WARN() in mm/...
2147
2148
  		WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2149
  				addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2150
2151
  		return;
  	}
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
2152
2153
  	debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
  	debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
9a11b49a8   Ingo Molnar   [PATCH] lockdep: ...
2154

868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2155
  	vm_remove_mappings(area, deallocate_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2156
2157
2158
2159
  	if (deallocate_pages) {
  		int i;
  
  		for (i = 0; i < area->nr_pages; i++) {
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2160
2161
2162
  			struct page *page = area->pages[i];
  
  			BUG_ON(!page);
4949148ad   Vladimir Davydov   mm: charge/unchar...
2163
  			__free_pages(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2164
  		}
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2165
  		atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2166

244d63ee3   David Rientjes   mm, vmalloc: remo...
2167
  		kvfree(area->pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2168
2169
2170
2171
2172
  	}
  
  	kfree(area);
  	return;
  }
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
  
  static inline void __vfree_deferred(const void *addr)
  {
  	/*
  	 * Use raw_cpu_ptr() because this can be called from preemptible
  	 * context. Preemption is absolutely fine here, because the llist_add()
  	 * implementation is lockless, so it works even if we are adding to
  	 * nother cpu's list.  schedule_work() should be fine with this too.
  	 */
  	struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
  
  	if (llist_add((struct llist_node *)addr, &p->list))
  		schedule_work(&p->wq);
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2189
2190
   * vfree_atomic - release memory allocated by vmalloc()
   * @addr:	  memory base address
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2191
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2192
2193
   * This one is just like vfree() but can be called in any atomic context
   * except NMIs.
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
   */
  void vfree_atomic(const void *addr)
  {
  	BUG_ON(in_nmi());
  
  	kmemleak_free(addr);
  
  	if (!addr)
  		return;
  	__vfree_deferred(addr);
  }
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2205
2206
2207
2208
2209
2210
2211
  static void __vfree(const void *addr)
  {
  	if (unlikely(in_interrupt()))
  		__vfree_deferred(addr);
  	else
  		__vunmap(addr, 1);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2212
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2213
2214
   * vfree - release memory allocated by vmalloc()
   * @addr:  memory base address
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2215
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2216
2217
2218
   * Free the virtually continuous memory area starting at @addr, as
   * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
   * NULL, no operation is performed.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2219
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2220
2221
2222
   * Must not be called in NMI context (strictly speaking, only if we don't
   * have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
   * conventions for vfree() arch-depenedent would be a really bad idea)
c9fcee513   Andrew Morton   mm/vmalloc.c: add...
2223
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2224
   * May sleep if called *not* from interrupt context.
3ca4ea3a7   Andrey Ryabinin   mm/vmalloc.c: imp...
2225
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2226
   * NOTE: assumes that the object at @addr has a size >= sizeof(llist_node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2227
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2228
  void vfree(const void *addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2229
  {
32fcfd407   Al Viro   make vfree() safe...
2230
  	BUG_ON(in_nmi());
89219d37a   Catalin Marinas   kmemleak: Add the...
2231
2232
  
  	kmemleak_free(addr);
a8dda165e   Andrey Ryabinin   vfree: add debug ...
2233
  	might_sleep_if(!in_interrupt());
32fcfd407   Al Viro   make vfree() safe...
2234
2235
  	if (!addr)
  		return;
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2236
2237
  
  	__vfree(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2238
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2239
2240
2241
  EXPORT_SYMBOL(vfree);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2242
2243
   * vunmap - release virtual mapping obtained by vmap()
   * @addr:   memory base address
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2244
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2245
2246
   * Free the virtually contiguous memory area starting at @addr,
   * which was created from the page array passed to vmap().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2247
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2248
   * Must not be called in interrupt context.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2249
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2250
  void vunmap(const void *addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2251
2252
  {
  	BUG_ON(in_interrupt());
34754b69a   Peter Zijlstra   x86: make vmap ye...
2253
  	might_sleep();
32fcfd407   Al Viro   make vfree() safe...
2254
2255
  	if (addr)
  		__vunmap(addr, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2256
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2257
2258
2259
  EXPORT_SYMBOL(vunmap);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2260
2261
2262
2263
2264
2265
2266
2267
   * vmap - map an array of pages into virtually contiguous space
   * @pages: array of page pointers
   * @count: number of pages to map
   * @flags: vm_area->flags
   * @prot: page protection for the mapping
   *
   * Maps @count pages from @pages into contiguous kernel virtual
   * space.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2268
2269
   *
   * Return: the address of the area or %NULL on failure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2270
2271
   */
  void *vmap(struct page **pages, unsigned int count,
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2272
  	   unsigned long flags, pgprot_t prot)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2273
2274
  {
  	struct vm_struct *area;
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2275
  	unsigned long size;		/* In bytes */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2276

34754b69a   Peter Zijlstra   x86: make vmap ye...
2277
  	might_sleep();
ca79b0c21   Arun KS   mm: convert total...
2278
  	if (count > totalram_pages())
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2279
  		return NULL;
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2280
2281
  	size = (unsigned long)count << PAGE_SHIFT;
  	area = get_vm_area_caller(size, flags, __builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2282
2283
  	if (!area)
  		return NULL;
230169693   Christoph Lameter   vmallocinfo: add ...
2284

f6f8ed473   WANG Chao   mm/vmalloc.c: cle...
2285
  	if (map_vm_area(area, prot, pages)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2286
2287
2288
2289
2290
2291
  		vunmap(area->addr);
  		return NULL;
  	}
  
  	return area->addr;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2292
  EXPORT_SYMBOL(vmap);
8594a21cf   Michal Hocko   mm, vmalloc: fix ...
2293
2294
2295
  static void *__vmalloc_node(unsigned long size, unsigned long align,
  			    gfp_t gfp_mask, pgprot_t prot,
  			    int node, const void *caller);
e31d9eb5c   Adrian Bunk   make __vmalloc_ar...
2296
  static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
3722e13cf   Wanpeng Li   mm/vmalloc: don't...
2297
  				 pgprot_t prot, int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2298
2299
2300
  {
  	struct page **pages;
  	unsigned int nr_pages, array_size, i;
930f036b4   David Rientjes   mm, vmalloc: cons...
2301
  	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
704b862f9   Laura Abbott   mm/vmalloc.c: don...
2302
2303
2304
2305
  	const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
  	const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ?
  					0 :
  					__GFP_HIGHMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2306

762216ab4   Wanpeng Li   mm/vmalloc: use w...
2307
  	nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2308
  	array_size = (nr_pages * sizeof(struct page *));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2309
  	/* Please note that the recursion is strictly bounded. */
8757d5fa6   Jan Kiszka   [PATCH] mm: fix o...
2310
  	if (array_size > PAGE_SIZE) {
704b862f9   Laura Abbott   mm/vmalloc.c: don...
2311
  		pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
3722e13cf   Wanpeng Li   mm/vmalloc: don't...
2312
  				PAGE_KERNEL, node, area->caller);
286e1ea3a   Andrew Morton   [PATCH] vmalloc()...
2313
  	} else {
976d6dfbb   Jan Beulich   vmalloc(): adjust...
2314
  		pages = kmalloc_node(array_size, nested_gfp, node);
286e1ea3a   Andrew Morton   [PATCH] vmalloc()...
2315
  	}
7ea362427   Austin Kim   mm/vmalloc.c: mov...
2316
2317
  
  	if (!pages) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2318
2319
2320
2321
  		remove_vm_area(area->addr);
  		kfree(area);
  		return NULL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2322

7ea362427   Austin Kim   mm/vmalloc.c: mov...
2323
2324
  	area->pages = pages;
  	area->nr_pages = nr_pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2325
  	for (i = 0; i < area->nr_pages; i++) {
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2326
  		struct page *page;
4b90951c0   Jianguo Wu   mm/vmalloc: use N...
2327
  		if (node == NUMA_NO_NODE)
704b862f9   Laura Abbott   mm/vmalloc.c: don...
2328
  			page = alloc_page(alloc_mask|highmem_mask);
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2329
  		else
704b862f9   Laura Abbott   mm/vmalloc.c: don...
2330
  			page = alloc_pages_node(node, alloc_mask|highmem_mask, 0);
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2331
2332
  
  		if (unlikely(!page)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2333
2334
  			/* Successfully allocated i pages, free them in __vunmap() */
  			area->nr_pages = i;
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2335
  			atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2336
2337
  			goto fail;
  		}
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2338
  		area->pages[i] = page;
704b862f9   Laura Abbott   mm/vmalloc.c: don...
2339
  		if (gfpflags_allow_blocking(gfp_mask|highmem_mask))
660654f90   Eric Dumazet   mm/vmalloc.c: add...
2340
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2341
  	}
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2342
  	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2343

f6f8ed473   WANG Chao   mm/vmalloc.c: cle...
2344
  	if (map_vm_area(area, prot, pages))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2345
2346
2347
2348
  		goto fail;
  	return area->addr;
  
  fail:
a8e99259e   Michal Hocko   mm, page_alloc: w...
2349
  	warn_alloc(gfp_mask, NULL,
7877cdcc3   Michal Hocko   mm: consolidate w...
2350
  			  "vmalloc: allocation failure, allocated %ld of %ld bytes",
22943ab11   Dave Hansen   mm: print vmalloc...
2351
  			  (area->nr_pages*PAGE_SIZE), area->size);
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2352
  	__vfree(area->addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2353
2354
2355
2356
  	return NULL;
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
   * __vmalloc_node_range - allocate virtually contiguous memory
   * @size:		  allocation size
   * @align:		  desired alignment
   * @start:		  vm area range start
   * @end:		  vm area range end
   * @gfp_mask:		  flags for the page level allocator
   * @prot:		  protection mask for the allocated pages
   * @vm_flags:		  additional vm area flags (e.g. %VM_NO_GUARD)
   * @node:		  node to use for allocation or NUMA_NO_NODE
   * @caller:		  caller's return address
   *
   * Allocate enough pages to cover @size from the page level
   * allocator with @gfp_mask flags.  Map them into contiguous
   * kernel virtual space, using a pagetable protection of @prot.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2371
2372
   *
   * Return: the address of the area or %NULL on failure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2373
   */
d0a21265d   David Rientjes   mm: unify module_...
2374
2375
  void *__vmalloc_node_range(unsigned long size, unsigned long align,
  			unsigned long start, unsigned long end, gfp_t gfp_mask,
cb9e3c292   Andrey Ryabinin   mm: vmalloc: pass...
2376
2377
  			pgprot_t prot, unsigned long vm_flags, int node,
  			const void *caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2378
2379
  {
  	struct vm_struct *area;
89219d37a   Catalin Marinas   kmemleak: Add the...
2380
2381
  	void *addr;
  	unsigned long real_size = size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2382
2383
  
  	size = PAGE_ALIGN(size);
ca79b0c21   Arun KS   mm: convert total...
2384
  	if (!size || (size >> PAGE_SHIFT) > totalram_pages())
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2385
  		goto fail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2386

cb9e3c292   Andrey Ryabinin   mm: vmalloc: pass...
2387
2388
  	area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
  				vm_flags, start, end, node, gfp_mask, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2389
  	if (!area)
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2390
  		goto fail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2391

3722e13cf   Wanpeng Li   mm/vmalloc: don't...
2392
  	addr = __vmalloc_area_node(area, gfp_mask, prot, node);
1368edf06   Mel Gorman   mm: vmalloc: chec...
2393
  	if (!addr)
b82225f3f   Wanpeng Li   revert mm/vmalloc...
2394
  		return NULL;
89219d37a   Catalin Marinas   kmemleak: Add the...
2395
2396
  
  	/*
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2397
2398
  	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED
  	 * flag. It means that vm_struct is not fully initialized.
4341fa454   Joonsoo Kim   mm, vmalloc: remo...
2399
  	 * Now, it is fully initialized, so remove this flag here.
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2400
  	 */
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2401
  	clear_vm_uninitialized_flag(area);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2402

94f4a1618   Catalin Marinas   mm: kmemleak: tre...
2403
  	kmemleak_vmalloc(area, size, gfp_mask);
89219d37a   Catalin Marinas   kmemleak: Add the...
2404
2405
  
  	return addr;
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2406
2407
  
  fail:
a8e99259e   Michal Hocko   mm, page_alloc: w...
2408
  	warn_alloc(gfp_mask, NULL,
7877cdcc3   Michal Hocko   mm: consolidate w...
2409
  			  "vmalloc: allocation failure: %lu bytes", real_size);
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2410
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2411
  }
153178edc   Uladzislau Rezki (Sony)   vmalloc: export _...
2412
2413
2414
2415
2416
2417
2418
2419
  /*
   * This is only for performance analysis of vmalloc and stress purpose.
   * It is required by vmalloc test module, therefore do not use it other
   * than that.
   */
  #ifdef CONFIG_TEST_VMALLOC_MODULE
  EXPORT_SYMBOL_GPL(__vmalloc_node_range);
  #endif
d0a21265d   David Rientjes   mm: unify module_...
2420
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2421
2422
2423
2424
2425
2426
2427
   * __vmalloc_node - allocate virtually contiguous memory
   * @size:	    allocation size
   * @align:	    desired alignment
   * @gfp_mask:	    flags for the page level allocator
   * @prot:	    protection mask for the allocated pages
   * @node:	    node to use for allocation or NUMA_NO_NODE
   * @caller:	    caller's return address
a7c3e901a   Michal Hocko   mm: introduce kv[...
2428
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2429
2430
2431
   * Allocate enough pages to cover @size from the page level
   * allocator with @gfp_mask flags.  Map them into contiguous
   * kernel virtual space, using a pagetable protection of @prot.
a7c3e901a   Michal Hocko   mm: introduce kv[...
2432
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2433
2434
   * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
   * and __GFP_NOFAIL are not supported
a7c3e901a   Michal Hocko   mm: introduce kv[...
2435
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2436
2437
   * Any use of gfp flags outside of GFP_KERNEL should be consulted
   * with mm people.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2438
2439
   *
   * Return: pointer to the allocated memory or %NULL on error
d0a21265d   David Rientjes   mm: unify module_...
2440
   */
8594a21cf   Michal Hocko   mm, vmalloc: fix ...
2441
  static void *__vmalloc_node(unsigned long size, unsigned long align,
d0a21265d   David Rientjes   mm: unify module_...
2442
  			    gfp_t gfp_mask, pgprot_t prot,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
2443
  			    int node, const void *caller)
d0a21265d   David Rientjes   mm: unify module_...
2444
2445
  {
  	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
cb9e3c292   Andrey Ryabinin   mm: vmalloc: pass...
2446
  				gfp_mask, prot, 0, node, caller);
d0a21265d   David Rientjes   mm: unify module_...
2447
  }
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2448
2449
  void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
  {
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2450
  	return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
230169693   Christoph Lameter   vmallocinfo: add ...
2451
  				__builtin_return_address(0));
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2452
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2453
  EXPORT_SYMBOL(__vmalloc);
8594a21cf   Michal Hocko   mm, vmalloc: fix ...
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
  static inline void *__vmalloc_node_flags(unsigned long size,
  					int node, gfp_t flags)
  {
  	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
  					node, __builtin_return_address(0));
  }
  
  
  void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
  				  void *caller)
  {
  	return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2467
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2468
2469
2470
2471
2472
   * vmalloc - allocate virtually contiguous memory
   * @size:    allocation size
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2473
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2474
2475
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2476
2477
   *
   * Return: pointer to the allocated memory or %NULL on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2478
2479
2480
   */
  void *vmalloc(unsigned long size)
  {
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2481
  	return __vmalloc_node_flags(size, NUMA_NO_NODE,
19809c2da   Michal Hocko   mm, vmalloc: use ...
2482
  				    GFP_KERNEL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2483
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2484
  EXPORT_SYMBOL(vmalloc);
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2485
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2486
2487
2488
2489
2490
2491
2492
2493
2494
   * vzalloc - allocate virtually contiguous memory with zero fill
   * @size:    allocation size
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
   * The memory allocated is set to zero.
   *
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2495
2496
   *
   * Return: pointer to the allocated memory or %NULL on error
e1ca7788d   Dave Young   mm: add vzalloc()...
2497
2498
2499
   */
  void *vzalloc(unsigned long size)
  {
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2500
  	return __vmalloc_node_flags(size, NUMA_NO_NODE,
19809c2da   Michal Hocko   mm, vmalloc: use ...
2501
  				GFP_KERNEL | __GFP_ZERO);
e1ca7788d   Dave Young   mm: add vzalloc()...
2502
2503
2504
2505
  }
  EXPORT_SYMBOL(vzalloc);
  
  /**
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2506
2507
   * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
   * @size: allocation size
833423143   Nick Piggin   [PATCH] mm: intro...
2508
   *
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2509
2510
   * The resulting memory area is zeroed so it can be mapped to userspace
   * without leaking data.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2511
2512
   *
   * Return: pointer to the allocated memory or %NULL on error
833423143   Nick Piggin   [PATCH] mm: intro...
2513
2514
2515
   */
  void *vmalloc_user(unsigned long size)
  {
bc84c5352   Roman Penyaev   mm/vmalloc: pass ...
2516
2517
2518
2519
  	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
  				    GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
  				    VM_USERMAP, NUMA_NO_NODE,
  				    __builtin_return_address(0));
833423143   Nick Piggin   [PATCH] mm: intro...
2520
2521
2522
2523
  }
  EXPORT_SYMBOL(vmalloc_user);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2524
2525
2526
   * vmalloc_node - allocate memory on a specific node
   * @size:	  allocation size
   * @node:	  numa node
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2527
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2528
2529
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2530
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2531
2532
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2533
2534
   *
   * Return: pointer to the allocated memory or %NULL on error
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2535
2536
2537
   */
  void *vmalloc_node(unsigned long size, int node)
  {
19809c2da   Michal Hocko   mm, vmalloc: use ...
2538
  	return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
230169693   Christoph Lameter   vmallocinfo: add ...
2539
  					node, __builtin_return_address(0));
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2540
2541
  }
  EXPORT_SYMBOL(vmalloc_node);
e1ca7788d   Dave Young   mm: add vzalloc()...
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
  /**
   * vzalloc_node - allocate memory on a specific node with zero fill
   * @size:	allocation size
   * @node:	numa node
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
   * The memory allocated is set to zero.
   *
   * For tight control over page level allocator and protection flags
   * use __vmalloc_node() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2553
2554
   *
   * Return: pointer to the allocated memory or %NULL on error
e1ca7788d   Dave Young   mm: add vzalloc()...
2555
2556
2557
2558
   */
  void *vzalloc_node(unsigned long size, int node)
  {
  	return __vmalloc_node_flags(size, node,
19809c2da   Michal Hocko   mm, vmalloc: use ...
2559
  			 GFP_KERNEL | __GFP_ZERO);
e1ca7788d   Dave Young   mm: add vzalloc()...
2560
2561
  }
  EXPORT_SYMBOL(vzalloc_node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2562
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2563
2564
   * vmalloc_exec - allocate virtually contiguous, executable memory
   * @size:	  allocation size
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2565
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2566
2567
2568
   * Kernel-internal function to allocate enough pages to cover @size
   * the page level allocator and map them into contiguous and
   * executable kernel virtual space.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2569
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2570
2571
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2572
2573
   *
   * Return: pointer to the allocated memory or %NULL on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2574
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2575
2576
  void *vmalloc_exec(unsigned long size)
  {
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2577
2578
2579
  	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
  			GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
  			NUMA_NO_NODE, __builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2580
  }
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2581
  #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
698d0831b   Michal Hocko   vmalloc: fix __GF...
2582
  #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2583
  #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
698d0831b   Michal Hocko   vmalloc: fix __GF...
2584
  #define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2585
  #else
698d0831b   Michal Hocko   vmalloc: fix __GF...
2586
2587
2588
2589
2590
  /*
   * 64b systems should always have either DMA or DMA32 zones. For others
   * GFP_DMA32 should do the right thing and use the normal zone.
   */
  #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2591
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2592
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2593
2594
   * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
   * @size:	allocation size
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2595
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2596
2597
   * Allocate enough 32bit PA addressable pages to cover @size from the
   * page level allocator and map them into contiguous kernel virtual space.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2598
2599
   *
   * Return: pointer to the allocated memory or %NULL on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2600
2601
2602
   */
  void *vmalloc_32(unsigned long size)
  {
2dca6999e   David Miller   mm, perf_event: M...
2603
  	return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2604
  			      NUMA_NO_NODE, __builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2605
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2606
  EXPORT_SYMBOL(vmalloc_32);
833423143   Nick Piggin   [PATCH] mm: intro...
2607
  /**
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2608
   * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2609
   * @size:	     allocation size
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2610
2611
2612
   *
   * The resulting memory area is 32bit addressable and zeroed so it can be
   * mapped to userspace without leaking data.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2613
2614
   *
   * Return: pointer to the allocated memory or %NULL on error
833423143   Nick Piggin   [PATCH] mm: intro...
2615
2616
2617
   */
  void *vmalloc_32_user(unsigned long size)
  {
bc84c5352   Roman Penyaev   mm/vmalloc: pass ...
2618
2619
2620
2621
  	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
  				    GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
  				    VM_USERMAP, NUMA_NO_NODE,
  				    __builtin_return_address(0));
833423143   Nick Piggin   [PATCH] mm: intro...
2622
2623
  }
  EXPORT_SYMBOL(vmalloc_32_user);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
  /*
   * small helper routine , copy contents to buf from addr.
   * If the page is not present, fill zero.
   */
  
  static int aligned_vread(char *buf, char *addr, unsigned long count)
  {
  	struct page *p;
  	int copied = 0;
  
  	while (count) {
  		unsigned long offset, length;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
2636
  		offset = offset_in_page(addr);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
  		length = PAGE_SIZE - offset;
  		if (length > count)
  			length = count;
  		p = vmalloc_to_page(addr);
  		/*
  		 * To do safe access to this _mapped_ area, we need
  		 * lock. But adding lock here means that we need to add
  		 * overhead of vmalloc()/vfree() calles for this _debug_
  		 * interface, rarely used. Instead of that, we'll use
  		 * kmap() and get small overhead in this access function.
  		 */
  		if (p) {
  			/*
  			 * we can expect USER0 is not used (see vread/vwrite's
  			 * function description)
  			 */
9b04c5fec   Cong Wang   mm: remove the se...
2653
  			void *map = kmap_atomic(p);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2654
  			memcpy(buf, map + offset, length);
9b04c5fec   Cong Wang   mm: remove the se...
2655
  			kunmap_atomic(map);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
  		} else
  			memset(buf, 0, length);
  
  		addr += length;
  		buf += length;
  		copied += length;
  		count -= length;
  	}
  	return copied;
  }
  
  static int aligned_vwrite(char *buf, char *addr, unsigned long count)
  {
  	struct page *p;
  	int copied = 0;
  
  	while (count) {
  		unsigned long offset, length;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
2674
  		offset = offset_in_page(addr);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
  		length = PAGE_SIZE - offset;
  		if (length > count)
  			length = count;
  		p = vmalloc_to_page(addr);
  		/*
  		 * To do safe access to this _mapped_ area, we need
  		 * lock. But adding lock here means that we need to add
  		 * overhead of vmalloc()/vfree() calles for this _debug_
  		 * interface, rarely used. Instead of that, we'll use
  		 * kmap() and get small overhead in this access function.
  		 */
  		if (p) {
  			/*
  			 * we can expect USER0 is not used (see vread/vwrite's
  			 * function description)
  			 */
9b04c5fec   Cong Wang   mm: remove the se...
2691
  			void *map = kmap_atomic(p);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2692
  			memcpy(map + offset, buf, length);
9b04c5fec   Cong Wang   mm: remove the se...
2693
  			kunmap_atomic(map);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
  		}
  		addr += length;
  		buf += length;
  		copied += length;
  		count -= length;
  	}
  	return copied;
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2704
2705
2706
2707
2708
   * vread() - read vmalloc area in a safe way.
   * @buf:     buffer for reading data
   * @addr:    vm address.
   * @count:   number of bytes to be read.
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
   * This function checks that addr is a valid vmalloc'ed area, and
   * copy data from that area to a given buffer. If the given memory range
   * of [addr...addr+count) includes some valid address, data is copied to
   * proper area of @buf. If there are memory holes, they'll be zero-filled.
   * IOREMAP area is treated as memory hole and no copy is done.
   *
   * If [addr...addr+count) doesn't includes any intersects with alive
   * vm_struct area, returns 0. @buf should be kernel's buffer.
   *
   * Note: In usual ops, vread() is never necessary because the caller
   * should know vmalloc() area is valid and can use memcpy().
   * This is for routines which have to access vmalloc area without
d9009d67f   Geert Uytterhoeven   mm/vmalloc.c: spe...
2721
   * any information, as /dev/kmem.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2722
2723
2724
2725
   *
   * Return: number of bytes for which addr and buf should be increased
   * (same number as @count) or %0 if [addr...addr+count) doesn't
   * include any intersection with valid vmalloc area
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2726
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2727
2728
  long vread(char *buf, char *addr, unsigned long count)
  {
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2729
2730
  	struct vmap_area *va;
  	struct vm_struct *vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2731
  	char *vaddr, *buf_start = buf;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2732
  	unsigned long buflen = count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2733
2734
2735
2736
2737
  	unsigned long n;
  
  	/* Don't allow overflow */
  	if ((unsigned long) addr + count < count)
  		count = -(unsigned long) addr;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2738
2739
2740
2741
  	spin_lock(&vmap_area_lock);
  	list_for_each_entry(va, &vmap_area_list, list) {
  		if (!count)
  			break;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2742
  		if (!va->vm)
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2743
2744
2745
2746
  			continue;
  
  		vm = va->vm;
  		vaddr = (char *) vm->addr;
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2747
  		if (addr >= vaddr + get_vm_area_size(vm))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2748
2749
2750
2751
2752
2753
2754
2755
2756
  			continue;
  		while (addr < vaddr) {
  			if (count == 0)
  				goto finished;
  			*buf = '\0';
  			buf++;
  			addr++;
  			count--;
  		}
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2757
  		n = vaddr + get_vm_area_size(vm) - addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2758
2759
  		if (n > count)
  			n = count;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2760
  		if (!(vm->flags & VM_IOREMAP))
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2761
2762
2763
2764
2765
2766
  			aligned_vread(buf, addr, n);
  		else /* IOREMAP area is treated as memory hole */
  			memset(buf, 0, n);
  		buf += n;
  		addr += n;
  		count -= n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2767
2768
  	}
  finished:
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2769
  	spin_unlock(&vmap_area_lock);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2770
2771
2772
2773
2774
2775
2776
2777
  
  	if (buf == buf_start)
  		return 0;
  	/* zero-fill memory holes */
  	if (buf != buf_start + buflen)
  		memset(buf, 0, buflen - (buf - buf_start));
  
  	return buflen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2778
  }
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2779
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2780
2781
2782
2783
2784
   * vwrite() - write vmalloc area in a safe way.
   * @buf:      buffer for source data
   * @addr:     vm address.
   * @count:    number of bytes to be read.
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
   * This function checks that addr is a valid vmalloc'ed area, and
   * copy data from a buffer to the given addr. If specified range of
   * [addr...addr+count) includes some valid address, data is copied from
   * proper area of @buf. If there are memory holes, no copy to hole.
   * IOREMAP area is treated as memory hole and no copy is done.
   *
   * If [addr...addr+count) doesn't includes any intersects with alive
   * vm_struct area, returns 0. @buf should be kernel's buffer.
   *
   * Note: In usual ops, vwrite() is never necessary because the caller
   * should know vmalloc() area is valid and can use memcpy().
   * This is for routines which have to access vmalloc area without
d9009d67f   Geert Uytterhoeven   mm/vmalloc.c: spe...
2797
   * any information, as /dev/kmem.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2798
2799
2800
2801
   *
   * Return: number of bytes for which addr and buf should be
   * increased (same number as @count) or %0 if [addr...addr+count)
   * doesn't include any intersection with valid vmalloc area
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2802
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2803
2804
  long vwrite(char *buf, char *addr, unsigned long count)
  {
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2805
2806
  	struct vmap_area *va;
  	struct vm_struct *vm;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2807
2808
2809
  	char *vaddr;
  	unsigned long n, buflen;
  	int copied = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2810
2811
2812
2813
  
  	/* Don't allow overflow */
  	if ((unsigned long) addr + count < count)
  		count = -(unsigned long) addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2814
  	buflen = count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2815

e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2816
2817
2818
2819
  	spin_lock(&vmap_area_lock);
  	list_for_each_entry(va, &vmap_area_list, list) {
  		if (!count)
  			break;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2820
  		if (!va->vm)
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2821
2822
2823
2824
  			continue;
  
  		vm = va->vm;
  		vaddr = (char *) vm->addr;
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2825
  		if (addr >= vaddr + get_vm_area_size(vm))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2826
2827
2828
2829
2830
2831
2832
2833
  			continue;
  		while (addr < vaddr) {
  			if (count == 0)
  				goto finished;
  			buf++;
  			addr++;
  			count--;
  		}
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2834
  		n = vaddr + get_vm_area_size(vm) - addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2835
2836
  		if (n > count)
  			n = count;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2837
  		if (!(vm->flags & VM_IOREMAP)) {
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2838
2839
2840
2841
2842
2843
  			aligned_vwrite(buf, addr, n);
  			copied++;
  		}
  		buf += n;
  		addr += n;
  		count -= n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2844
2845
  	}
  finished:
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2846
  	spin_unlock(&vmap_area_lock);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2847
2848
2849
  	if (!copied)
  		return 0;
  	return buflen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2850
  }
833423143   Nick Piggin   [PATCH] mm: intro...
2851
2852
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2853
2854
2855
2856
2857
   * remap_vmalloc_range_partial - map vmalloc pages to userspace
   * @vma:		vma to cover
   * @uaddr:		target user address to start at
   * @kaddr:		virtual address of vmalloc kernel memory
   * @size:		size of map area
7682486b3   Randy Dunlap   mm: fix various k...
2858
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2859
   * Returns:	0 for success, -Exxx on failure
833423143   Nick Piggin   [PATCH] mm: intro...
2860
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2861
2862
2863
2864
   * This function checks that @kaddr is a valid vmalloc'ed area,
   * and that it is big enough to cover the range starting at
   * @uaddr in @vma. Will return failure if that criteria isn't
   * met.
833423143   Nick Piggin   [PATCH] mm: intro...
2865
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2866
   * Similar to remap_pfn_range() (see mm/memory.c)
833423143   Nick Piggin   [PATCH] mm: intro...
2867
   */
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2868
2869
  int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
  				void *kaddr, unsigned long size)
833423143   Nick Piggin   [PATCH] mm: intro...
2870
2871
  {
  	struct vm_struct *area;
833423143   Nick Piggin   [PATCH] mm: intro...
2872

e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2873
2874
2875
  	size = PAGE_ALIGN(size);
  
  	if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
833423143   Nick Piggin   [PATCH] mm: intro...
2876
  		return -EINVAL;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2877
  	area = find_vm_area(kaddr);
833423143   Nick Piggin   [PATCH] mm: intro...
2878
  	if (!area)
db64fe022   Nick Piggin   mm: rewrite vmap ...
2879
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
2880

fe9041c24   Christoph Hellwig   vmalloc: lift the...
2881
  	if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
db64fe022   Nick Piggin   mm: rewrite vmap ...
2882
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
2883

401592d2e   Roman Penyaev   mm/vmalloc: fix s...
2884
  	if (kaddr + size > area->addr + get_vm_area_size(area))
db64fe022   Nick Piggin   mm: rewrite vmap ...
2885
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
2886

833423143   Nick Piggin   [PATCH] mm: intro...
2887
  	do {
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2888
  		struct page *page = vmalloc_to_page(kaddr);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2889
  		int ret;
833423143   Nick Piggin   [PATCH] mm: intro...
2890
2891
2892
2893
2894
  		ret = vm_insert_page(vma, uaddr, page);
  		if (ret)
  			return ret;
  
  		uaddr += PAGE_SIZE;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2895
2896
2897
  		kaddr += PAGE_SIZE;
  		size -= PAGE_SIZE;
  	} while (size > 0);
833423143   Nick Piggin   [PATCH] mm: intro...
2898

314e51b98   Konstantin Khlebnikov   mm: kill vma flag...
2899
  	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
833423143   Nick Piggin   [PATCH] mm: intro...
2900

db64fe022   Nick Piggin   mm: rewrite vmap ...
2901
  	return 0;
833423143   Nick Piggin   [PATCH] mm: intro...
2902
  }
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2903
2904
2905
  EXPORT_SYMBOL(remap_vmalloc_range_partial);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2906
2907
2908
2909
   * remap_vmalloc_range - map vmalloc pages to userspace
   * @vma:		vma to cover (map full range of vma)
   * @addr:		vmalloc memory
   * @pgoff:		number of pages into addr before first page to map
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2910
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2911
   * Returns:	0 for success, -Exxx on failure
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2912
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2913
2914
2915
   * This function checks that addr is a valid vmalloc'ed area, and
   * that it is big enough to cover the vma. Will return failure if
   * that criteria isn't met.
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2916
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2917
   * Similar to remap_pfn_range() (see mm/memory.c)
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2918
2919
2920
2921
2922
2923
2924
2925
   */
  int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
  						unsigned long pgoff)
  {
  	return remap_vmalloc_range_partial(vma, vma->vm_start,
  					   addr + (pgoff << PAGE_SHIFT),
  					   vma->vm_end - vma->vm_start);
  }
833423143   Nick Piggin   [PATCH] mm: intro...
2926
  EXPORT_SYMBOL(remap_vmalloc_range);
1eeb66a1b   Christoph Hellwig   move die notifier...
2927
2928
2929
  /*
   * Implement a stub for vmalloc_sync_all() if the architecture chose not to
   * have one.
3f8fd02b1   Joerg Roedel   mm/vmalloc: Sync ...
2930
2931
2932
   *
   * The purpose of this function is to make sure the vmalloc area
   * mappings are identical in all page-tables in the system.
1eeb66a1b   Christoph Hellwig   move die notifier...
2933
   */
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
2934
  void __weak vmalloc_sync_all(void)
1eeb66a1b   Christoph Hellwig   move die notifier...
2935
2936
  {
  }
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2937

8b1e0f81f   Anshuman Khandual   mm/pgtable: drop ...
2938
  static int f(pte_t *pte, unsigned long addr, void *data)
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2939
  {
cd12909cb   David Vrabel   xen: map foreign ...
2940
2941
2942
2943
2944
2945
  	pte_t ***p = data;
  
  	if (p) {
  		*(*p) = pte;
  		(*p)++;
  	}
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2946
2947
2948
2949
  	return 0;
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2950
2951
2952
   * alloc_vm_area - allocate a range of kernel address space
   * @size:	   size of the area
   * @ptes:	   returns the PTEs for the address space
7682486b3   Randy Dunlap   mm: fix various k...
2953
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2954
   * Returns:	NULL on failure, vm_struct on success
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2955
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2956
2957
2958
   * This function reserves a range of kernel address space, and
   * allocates pagetables to map that range.  No actual mappings
   * are created.
cd12909cb   David Vrabel   xen: map foreign ...
2959
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2960
2961
   * If @ptes is non-NULL, pointers to the PTEs (in init_mm)
   * allocated for the VM area are returned.
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2962
   */
cd12909cb   David Vrabel   xen: map foreign ...
2963
  struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2964
2965
  {
  	struct vm_struct *area;
230169693   Christoph Lameter   vmallocinfo: add ...
2966
2967
  	area = get_vm_area_caller(size, VM_IOREMAP,
  				__builtin_return_address(0));
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2968
2969
2970
2971
2972
2973
2974
2975
  	if (area == NULL)
  		return NULL;
  
  	/*
  	 * This ensures that page tables are constructed for this region
  	 * of kernel virtual address space and mapped into init_mm.
  	 */
  	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
cd12909cb   David Vrabel   xen: map foreign ...
2976
  				size, f, ptes ? &ptes : NULL)) {
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2977
2978
2979
  		free_vm_area(area);
  		return NULL;
  	}
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
  	return area;
  }
  EXPORT_SYMBOL_GPL(alloc_vm_area);
  
  void free_vm_area(struct vm_struct *area)
  {
  	struct vm_struct *ret;
  	ret = remove_vm_area(area->addr);
  	BUG_ON(ret != area);
  	kfree(area);
  }
  EXPORT_SYMBOL_GPL(free_vm_area);
a10aa5798   Christoph Lameter   vmalloc: show vma...
2992

4f8b02b4e   Tejun Heo   vmalloc: pcpu_get...
2993
  #ifdef CONFIG_SMP
ca23e405e   Tejun Heo   vmalloc: implemen...
2994
2995
  static struct vmap_area *node_to_va(struct rb_node *n)
  {
4583e7731   Geliang Tang   mm/vmalloc.c: use...
2996
  	return rb_entry_safe(n, struct vmap_area, rb_node);
ca23e405e   Tejun Heo   vmalloc: implemen...
2997
2998
2999
  }
  
  /**
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3000
3001
   * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
   * @addr: target address
ca23e405e   Tejun Heo   vmalloc: implemen...
3002
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3003
3004
3005
3006
   * Returns: vmap_area if it is found. If there is no such area
   *   the first highest(reverse order) vmap_area is returned
   *   i.e. va->va_start < addr && va->va_end < addr or NULL
   *   if there are no any areas before @addr.
ca23e405e   Tejun Heo   vmalloc: implemen...
3007
   */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3008
3009
  static struct vmap_area *
  pvm_find_va_enclose_addr(unsigned long addr)
ca23e405e   Tejun Heo   vmalloc: implemen...
3010
  {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3011
3012
3013
3014
3015
  	struct vmap_area *va, *tmp;
  	struct rb_node *n;
  
  	n = free_vmap_area_root.rb_node;
  	va = NULL;
ca23e405e   Tejun Heo   vmalloc: implemen...
3016
3017
  
  	while (n) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3018
3019
3020
3021
3022
  		tmp = rb_entry(n, struct vmap_area, rb_node);
  		if (tmp->va_start <= addr) {
  			va = tmp;
  			if (tmp->va_end >= addr)
  				break;
ca23e405e   Tejun Heo   vmalloc: implemen...
3023
  			n = n->rb_right;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3024
3025
3026
  		} else {
  			n = n->rb_left;
  		}
ca23e405e   Tejun Heo   vmalloc: implemen...
3027
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3028
  	return va;
ca23e405e   Tejun Heo   vmalloc: implemen...
3029
3030
3031
  }
  
  /**
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3032
3033
3034
3035
3036
   * pvm_determine_end_from_reverse - find the highest aligned address
   * of free block below VMALLOC_END
   * @va:
   *   in - the VA we start the search(reverse order);
   *   out - the VA with the highest aligned end address.
ca23e405e   Tejun Heo   vmalloc: implemen...
3037
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3038
   * Returns: determined end address within vmap_area
ca23e405e   Tejun Heo   vmalloc: implemen...
3039
   */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3040
3041
  static unsigned long
  pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
ca23e405e   Tejun Heo   vmalloc: implemen...
3042
  {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3043
  	unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
ca23e405e   Tejun Heo   vmalloc: implemen...
3044
  	unsigned long addr;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3045
3046
3047
3048
3049
3050
3051
  	if (likely(*va)) {
  		list_for_each_entry_from_reverse((*va),
  				&free_vmap_area_list, list) {
  			addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
  			if ((*va)->va_start < addr)
  				return addr;
  		}
ca23e405e   Tejun Heo   vmalloc: implemen...
3052
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3053
  	return 0;
ca23e405e   Tejun Heo   vmalloc: implemen...
3054
3055
3056
3057
3058
3059
3060
3061
  }
  
  /**
   * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
   * @offsets: array containing offset of each area
   * @sizes: array containing size of each area
   * @nr_vms: the number of areas to allocate
   * @align: alignment, all entries in @offsets and @sizes must be aligned to this
ca23e405e   Tejun Heo   vmalloc: implemen...
3062
3063
3064
3065
3066
3067
   *
   * Returns: kmalloc'd vm_struct pointer array pointing to allocated
   *	    vm_structs on success, %NULL on failure
   *
   * Percpu allocator wants to use congruent vm areas so that it can
   * maintain the offsets among percpu areas.  This function allocates
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3068
3069
3070
3071
   * congruent vmalloc areas for it with GFP_KERNEL.  These areas tend to
   * be scattered pretty far, distance between two areas easily going up
   * to gigabytes.  To avoid interacting with regular vmallocs, these
   * areas are allocated from top.
ca23e405e   Tejun Heo   vmalloc: implemen...
3072
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3073
3074
3075
3076
3077
3078
   * Despite its complicated look, this allocator is rather simple. It
   * does everything top-down and scans free blocks from the end looking
   * for matching base. While scanning, if any of the areas do not fit the
   * base address is pulled down to fit the area. Scanning is repeated till
   * all the areas fit and then all necessary data structures are inserted
   * and the result is returned.
ca23e405e   Tejun Heo   vmalloc: implemen...
3079
3080
3081
   */
  struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
  				     const size_t *sizes, int nr_vms,
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3082
  				     size_t align)
ca23e405e   Tejun Heo   vmalloc: implemen...
3083
3084
3085
  {
  	const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
  	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3086
  	struct vmap_area **vas, *va;
ca23e405e   Tejun Heo   vmalloc: implemen...
3087
3088
  	struct vm_struct **vms;
  	int area, area2, last_area, term_area;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3089
  	unsigned long base, start, size, end, last_end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3090
  	bool purged = false;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3091
  	enum fit_type type;
ca23e405e   Tejun Heo   vmalloc: implemen...
3092

ca23e405e   Tejun Heo   vmalloc: implemen...
3093
  	/* verify parameters and allocate data structures */
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
3094
  	BUG_ON(offset_in_page(align) || !is_power_of_2(align));
ca23e405e   Tejun Heo   vmalloc: implemen...
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
  	for (last_area = 0, area = 0; area < nr_vms; area++) {
  		start = offsets[area];
  		end = start + sizes[area];
  
  		/* is everything aligned properly? */
  		BUG_ON(!IS_ALIGNED(offsets[area], align));
  		BUG_ON(!IS_ALIGNED(sizes[area], align));
  
  		/* detect the area with the highest address */
  		if (start > offsets[last_area])
  			last_area = area;
c568da282   Wei Yang   mm/vmalloc.c: hal...
3106
  		for (area2 = area + 1; area2 < nr_vms; area2++) {
ca23e405e   Tejun Heo   vmalloc: implemen...
3107
3108
  			unsigned long start2 = offsets[area2];
  			unsigned long end2 = start2 + sizes[area2];
c568da282   Wei Yang   mm/vmalloc.c: hal...
3109
  			BUG_ON(start2 < end && start < end2);
ca23e405e   Tejun Heo   vmalloc: implemen...
3110
3111
3112
3113
3114
3115
3116
3117
  		}
  	}
  	last_end = offsets[last_area] + sizes[last_area];
  
  	if (vmalloc_end - vmalloc_start < last_end) {
  		WARN_ON(true);
  		return NULL;
  	}
4d67d8605   Thomas Meyer   mm: use kcalloc()...
3118
3119
  	vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
  	vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
ca23e405e   Tejun Heo   vmalloc: implemen...
3120
  	if (!vas || !vms)
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3121
  		goto err_free2;
ca23e405e   Tejun Heo   vmalloc: implemen...
3122
3123
  
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3124
  		vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3125
  		vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
ca23e405e   Tejun Heo   vmalloc: implemen...
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
  		if (!vas[area] || !vms[area])
  			goto err_free;
  	}
  retry:
  	spin_lock(&vmap_area_lock);
  
  	/* start scanning - we scan from the top, begin with the last area */
  	area = term_area = last_area;
  	start = offsets[area];
  	end = start + sizes[area];
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3136
3137
  	va = pvm_find_va_enclose_addr(vmalloc_end);
  	base = pvm_determine_end_from_reverse(&va, align) - end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3138
3139
  
  	while (true) {
ca23e405e   Tejun Heo   vmalloc: implemen...
3140
3141
3142
3143
  		/*
  		 * base might have underflowed, add last_end before
  		 * comparing.
  		 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3144
3145
  		if (base + last_end < vmalloc_start + last_end)
  			goto overflow;
ca23e405e   Tejun Heo   vmalloc: implemen...
3146
3147
  
  		/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3148
  		 * Fitting base has not been found.
ca23e405e   Tejun Heo   vmalloc: implemen...
3149
  		 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3150
3151
  		if (va == NULL)
  			goto overflow;
ca23e405e   Tejun Heo   vmalloc: implemen...
3152
3153
  
  		/*
5336e52c9   Kuppuswamy Sathyanarayanan   mm/vmalloc.c: fix...
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
  		 * If required width exeeds current VA block, move
  		 * base downwards and then recheck.
  		 */
  		if (base + end > va->va_end) {
  			base = pvm_determine_end_from_reverse(&va, align) - end;
  			term_area = area;
  			continue;
  		}
  
  		/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3164
  		 * If this VA does not fit, move base downwards and recheck.
ca23e405e   Tejun Heo   vmalloc: implemen...
3165
  		 */
5336e52c9   Kuppuswamy Sathyanarayanan   mm/vmalloc.c: fix...
3166
  		if (base + start < va->va_start) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3167
3168
  			va = node_to_va(rb_prev(&va->rb_node));
  			base = pvm_determine_end_from_reverse(&va, align) - end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
  			term_area = area;
  			continue;
  		}
  
  		/*
  		 * This area fits, move on to the previous one.  If
  		 * the previous one is the terminal one, we're done.
  		 */
  		area = (area + nr_vms - 1) % nr_vms;
  		if (area == term_area)
  			break;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3180

ca23e405e   Tejun Heo   vmalloc: implemen...
3181
3182
  		start = offsets[area];
  		end = start + sizes[area];
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3183
  		va = pvm_find_va_enclose_addr(base + end);
ca23e405e   Tejun Heo   vmalloc: implemen...
3184
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3185

ca23e405e   Tejun Heo   vmalloc: implemen...
3186
3187
  	/* we've found a fitting base, insert all va's */
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3188
  		int ret;
ca23e405e   Tejun Heo   vmalloc: implemen...
3189

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3190
3191
  		start = base + offsets[area];
  		size = sizes[area];
ca23e405e   Tejun Heo   vmalloc: implemen...
3192

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
  		va = pvm_find_va_enclose_addr(start);
  		if (WARN_ON_ONCE(va == NULL))
  			/* It is a BUG(), but trigger recovery instead. */
  			goto recovery;
  
  		type = classify_va_fit_type(va, start, size);
  		if (WARN_ON_ONCE(type == NOTHING_FIT))
  			/* It is a BUG(), but trigger recovery instead. */
  			goto recovery;
  
  		ret = adjust_va_to_fit_type(va, start, size, type);
  		if (unlikely(ret))
  			goto recovery;
  
  		/* Allocated area. */
  		va = vas[area];
  		va->va_start = start;
  		va->va_end = start + size;
  
  		insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3214
3215
3216
3217
3218
  
  	spin_unlock(&vmap_area_lock);
  
  	/* insert all vm's */
  	for (area = 0; area < nr_vms; area++)
3645cb4a4   Zhang Yanfei   mm, vmalloc: call...
3219
3220
  		setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
  				 pcpu_get_vm_areas);
ca23e405e   Tejun Heo   vmalloc: implemen...
3221
3222
3223
  
  	kfree(vas);
  	return vms;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
  recovery:
  	/* Remove previously inserted areas. */
  	while (area--) {
  		__free_vmap_area(vas[area]);
  		vas[area] = NULL;
  	}
  
  overflow:
  	spin_unlock(&vmap_area_lock);
  	if (!purged) {
  		purge_vmap_area_lazy();
  		purged = true;
  
  		/* Before "retry", check if we recover. */
  		for (area = 0; area < nr_vms; area++) {
  			if (vas[area])
  				continue;
  
  			vas[area] = kmem_cache_zalloc(
  				vmap_area_cachep, GFP_KERNEL);
  			if (!vas[area])
  				goto err_free;
  		}
  
  		goto retry;
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3250
3251
  err_free:
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3252
3253
  		if (vas[area])
  			kmem_cache_free(vmap_area_cachep, vas[area]);
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3254
  		kfree(vms[area]);
ca23e405e   Tejun Heo   vmalloc: implemen...
3255
  	}
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3256
  err_free2:
ca23e405e   Tejun Heo   vmalloc: implemen...
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
  	kfree(vas);
  	kfree(vms);
  	return NULL;
  }
  
  /**
   * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
   * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
   * @nr_vms: the number of allocated areas
   *
   * Free vm_structs and the array allocated by pcpu_get_vm_areas().
   */
  void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
  {
  	int i;
  
  	for (i = 0; i < nr_vms; i++)
  		free_vm_area(vms[i]);
  	kfree(vms);
  }
4f8b02b4e   Tejun Heo   vmalloc: pcpu_get...
3277
  #endif	/* CONFIG_SMP */
a10aa5798   Christoph Lameter   vmalloc: show vma...
3278
3279
3280
  
  #ifdef CONFIG_PROC_FS
  static void *s_start(struct seq_file *m, loff_t *pos)
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3281
  	__acquires(&vmap_area_lock)
a10aa5798   Christoph Lameter   vmalloc: show vma...
3282
  {
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3283
  	spin_lock(&vmap_area_lock);
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3284
  	return seq_list_start(&vmap_area_list, *pos);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3285
3286
3287
3288
  }
  
  static void *s_next(struct seq_file *m, void *p, loff_t *pos)
  {
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3289
  	return seq_list_next(p, &vmap_area_list, pos);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3290
3291
3292
  }
  
  static void s_stop(struct seq_file *m, void *p)
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3293
  	__releases(&vmap_area_lock)
a10aa5798   Christoph Lameter   vmalloc: show vma...
3294
  {
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3295
  	spin_unlock(&vmap_area_lock);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3296
  }
a47a126ad   Eric Dumazet   vmallocinfo: add ...
3297
3298
  static void show_numa_info(struct seq_file *m, struct vm_struct *v)
  {
e5adfffc8   Kirill A. Shutemov   mm: use IS_ENABLE...
3299
  	if (IS_ENABLED(CONFIG_NUMA)) {
a47a126ad   Eric Dumazet   vmallocinfo: add ...
3300
3301
3302
3303
  		unsigned int nr, *counters = m->private;
  
  		if (!counters)
  			return;
af12346cd   Wanpeng Li   mm/vmalloc: rever...
3304
3305
  		if (v->flags & VM_UNINITIALIZED)
  			return;
7e5b528b4   Dmitry Vyukov   mm/vmalloc.c: fix...
3306
3307
  		/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
  		smp_rmb();
af12346cd   Wanpeng Li   mm/vmalloc: rever...
3308

a47a126ad   Eric Dumazet   vmallocinfo: add ...
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
  		memset(counters, 0, nr_node_ids * sizeof(unsigned int));
  
  		for (nr = 0; nr < v->nr_pages; nr++)
  			counters[page_to_nid(v->pages[nr])]++;
  
  		for_each_node_state(nr, N_HIGH_MEMORY)
  			if (counters[nr])
  				seq_printf(m, " N%u=%u", nr, counters[nr]);
  	}
  }
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
  static void show_purge_info(struct seq_file *m)
  {
  	struct llist_node *head;
  	struct vmap_area *va;
  
  	head = READ_ONCE(vmap_purge_list.first);
  	if (head == NULL)
  		return;
  
  	llist_for_each_entry(va, head, purge_list) {
  		seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area
  ",
  			(void *)va->va_start, (void *)va->va_end,
  			va->va_end - va->va_start);
  	}
  }
a10aa5798   Christoph Lameter   vmalloc: show vma...
3335
3336
  static int s_show(struct seq_file *m, void *p)
  {
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3337
  	struct vmap_area *va;
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3338
  	struct vm_struct *v;
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3339
  	va = list_entry(p, struct vmap_area, list);
c2ce8c142   Wanpeng Li   mm/vmalloc: fix s...
3340
  	/*
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
3341
3342
  	 * s_show can encounter race with remove_vm_area, !vm on behalf
  	 * of vmap area is being tear down or vm_map_ram allocation.
c2ce8c142   Wanpeng Li   mm/vmalloc: fix s...
3343
  	 */
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
3344
  	if (!va->vm) {
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3345
3346
  		seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram
  ",
78c72746f   Yisheng Xie   vmalloc: show laz...
3347
  			(void *)va->va_start, (void *)va->va_end,
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3348
  			va->va_end - va->va_start);
78c72746f   Yisheng Xie   vmalloc: show laz...
3349

d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3350
  		return 0;
78c72746f   Yisheng Xie   vmalloc: show laz...
3351
  	}
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3352
3353
  
  	v = va->vm;
a10aa5798   Christoph Lameter   vmalloc: show vma...
3354

45ec16908   Kees Cook   mm: use %pK for /...
3355
  	seq_printf(m, "0x%pK-0x%pK %7ld",
a10aa5798   Christoph Lameter   vmalloc: show vma...
3356
  		v->addr, v->addr + v->size, v->size);
62c70bce8   Joe Perches   mm: convert sprin...
3357
3358
  	if (v->caller)
  		seq_printf(m, " %pS", v->caller);
230169693   Christoph Lameter   vmallocinfo: add ...
3359

a10aa5798   Christoph Lameter   vmalloc: show vma...
3360
3361
3362
3363
  	if (v->nr_pages)
  		seq_printf(m, " pages=%d", v->nr_pages);
  
  	if (v->phys_addr)
199eaa05a   Miles Chen   mm: cleanups for ...
3364
  		seq_printf(m, " phys=%pa", &v->phys_addr);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3365
3366
  
  	if (v->flags & VM_IOREMAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3367
  		seq_puts(m, " ioremap");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3368
3369
  
  	if (v->flags & VM_ALLOC)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3370
  		seq_puts(m, " vmalloc");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3371
3372
  
  	if (v->flags & VM_MAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3373
  		seq_puts(m, " vmap");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3374
3375
  
  	if (v->flags & VM_USERMAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3376
  		seq_puts(m, " user");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3377

fe9041c24   Christoph Hellwig   vmalloc: lift the...
3378
3379
  	if (v->flags & VM_DMA_COHERENT)
  		seq_puts(m, " dma-coherent");
244d63ee3   David Rientjes   mm, vmalloc: remo...
3380
  	if (is_vmalloc_addr(v->pages))
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3381
  		seq_puts(m, " vpages");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3382

a47a126ad   Eric Dumazet   vmallocinfo: add ...
3383
  	show_numa_info(m, v);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3384
3385
  	seq_putc(m, '
  ');
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3386
3387
3388
3389
3390
3391
3392
3393
3394
  
  	/*
  	 * As a final step, dump "unpurged" areas. Note,
  	 * that entire "/proc/vmallocinfo" output will not
  	 * be address sorted, because the purge list is not
  	 * sorted.
  	 */
  	if (list_is_last(&va->list, &vmap_area_list))
  		show_purge_info(m);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3395
3396
  	return 0;
  }
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3397
  static const struct seq_operations vmalloc_op = {
a10aa5798   Christoph Lameter   vmalloc: show vma...
3398
3399
3400
3401
3402
  	.start = s_start,
  	.next = s_next,
  	.stop = s_stop,
  	.show = s_show,
  };
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3403

5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3404
3405
  static int __init proc_vmalloc_init(void)
  {
fddda2b7b   Christoph Hellwig   proc: introduce p...
3406
  	if (IS_ENABLED(CONFIG_NUMA))
0825a6f98   Joe Perches   mm: use octal not...
3407
  		proc_create_seq_private("vmallocinfo", 0400, NULL,
44414d82c   Christoph Hellwig   proc: introduce p...
3408
3409
  				&vmalloc_op,
  				nr_node_ids * sizeof(unsigned int), NULL);
fddda2b7b   Christoph Hellwig   proc: introduce p...
3410
  	else
0825a6f98   Joe Perches   mm: use octal not...
3411
  		proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3412
3413
3414
  	return 0;
  }
  module_init(proc_vmalloc_init);
db3808c1b   Joonsoo Kim   mm, vmalloc: move...
3415

a10aa5798   Christoph Lameter   vmalloc: show vma...
3416
  #endif