Blame view

mm/vmalloc.c 100 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3
4
5
6
   *  Copyright (C) 1993  Linus Torvalds
   *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
   *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
7
   *  Numa awareness, Christoph Lameter, SGI, June 2005
d758ffe6b   Uladzislau Rezki (Sony)   mm/vmalloc: updat...
8
   *  Improving global KVA allocator, Uladzislau Rezki, Sony, May 2019
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
   */
db64fe022   Nick Piggin   mm: rewrite vmap ...
10
  #include <linux/vmalloc.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
13
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/highmem.h>
c3edc4010   Ingo Molnar   sched/headers: Mo...
14
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
  #include <linux/slab.h>
  #include <linux/spinlock.h>
  #include <linux/interrupt.h>
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
18
  #include <linux/proc_fs.h>
a10aa5798   Christoph Lameter   vmalloc: show vma...
19
  #include <linux/seq_file.h>
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
20
  #include <linux/set_memory.h>
3ac7fe5a4   Thomas Gleixner   infrastructure to...
21
  #include <linux/debugobjects.h>
230169693   Christoph Lameter   vmallocinfo: add ...
22
  #include <linux/kallsyms.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
23
  #include <linux/list.h>
4da56b99d   Chris Wilson   mm/vmap: Add a no...
24
  #include <linux/notifier.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
25
  #include <linux/rbtree.h>
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
26
  #include <linux/xarray.h>
5da96bdd9   Mel Gorman   mm/vmalloc: inclu...
27
  #include <linux/io.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
28
  #include <linux/rcupdate.h>
f0aa66179   Tejun Heo   vmalloc: implemen...
29
  #include <linux/pfn.h>
89219d37a   Catalin Marinas   kmemleak: Add the...
30
  #include <linux/kmemleak.h>
60063497a   Arun Sharma   atomic: use <linu...
31
  #include <linux/atomic.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
32
  #include <linux/compiler.h>
32fcfd407   Al Viro   make vfree() safe...
33
  #include <linux/llist.h>
0f616be12   Toshi Kani   mm: change __get_...
34
  #include <linux/bitops.h>
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
35
  #include <linux/rbtree_augmented.h>
bdebd6a28   Jann Horn   vmalloc: fix rema...
36
  #include <linux/overflow.h>
c0eb315ad   Nicholas Piggin   mm/vmalloc: fix H...
37
  #include <linux/pgtable.h>
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
38
  #include <linux/uaccess.h>
f7ee1f13d   Christophe Leroy   mm/vmalloc: enabl...
39
  #include <linux/hugetlb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
  #include <asm/tlbflush.h>
2dca6999e   David Miller   mm, perf_event: M...
41
  #include <asm/shmparam.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42

dd56b0464   Mel Gorman   mm: page_alloc: h...
43
  #include "internal.h"
2a681cfa5   Joerg Roedel   mm: move p?d_allo...
44
  #include "pgalloc-track.h"
dd56b0464   Mel Gorman   mm: page_alloc: h...
45

82a70ce04   Christoph Hellwig   mm: move ioremap_...
46
47
48
49
50
51
52
53
54
55
56
57
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
  static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
  
  static int __init set_nohugeiomap(char *str)
  {
  	ioremap_max_page_shift = PAGE_SHIFT;
  	return 0;
  }
  early_param("nohugeiomap", set_nohugeiomap);
  #else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
  static const unsigned int ioremap_max_page_shift = PAGE_SHIFT;
  #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
58
59
60
61
62
63
64
65
66
67
68
69
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
  static bool __ro_after_init vmap_allow_huge = true;
  
  static int __init set_nohugevmalloc(char *str)
  {
  	vmap_allow_huge = false;
  	return 0;
  }
  early_param("nohugevmalloc", set_nohugevmalloc);
  #else /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
  static const bool vmap_allow_huge = false;
  #endif	/* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
186525bd6   Ingo Molnar   mm, x86/mm: Untan...
70
71
72
73
74
75
76
  bool is_vmalloc_addr(const void *x)
  {
  	unsigned long addr = (unsigned long)x;
  
  	return addr >= VMALLOC_START && addr < VMALLOC_END;
  }
  EXPORT_SYMBOL(is_vmalloc_addr);
32fcfd407   Al Viro   make vfree() safe...
77
78
79
80
81
82
83
84
85
86
87
  struct vfree_deferred {
  	struct llist_head list;
  	struct work_struct wq;
  };
  static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
  
  static void __vunmap(const void *, int);
  
  static void free_work(struct work_struct *w)
  {
  	struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
894e58c14   Byungchul Park   mm/vmalloc.c: don...
88
89
90
91
  	struct llist_node *t, *llnode;
  
  	llist_for_each_safe(llnode, t, llist_del_all(&p->list))
  		__vunmap((void *)llnode, 1);
32fcfd407   Al Viro   make vfree() safe...
92
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
93
  /*** Page table manipulation functions ***/
5e9e3d777   Nicholas Piggin   mm: move vmap_ran...
94
95
  static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  			phys_addr_t phys_addr, pgprot_t prot,
f7ee1f13d   Christophe Leroy   mm/vmalloc: enabl...
96
  			unsigned int max_page_shift, pgtbl_mod_mask *mask)
5e9e3d777   Nicholas Piggin   mm: move vmap_ran...
97
98
99
  {
  	pte_t *pte;
  	u64 pfn;
f7ee1f13d   Christophe Leroy   mm/vmalloc: enabl...
100
  	unsigned long size = PAGE_SIZE;
5e9e3d777   Nicholas Piggin   mm: move vmap_ran...
101
102
103
104
105
106
107
  
  	pfn = phys_addr >> PAGE_SHIFT;
  	pte = pte_alloc_kernel_track(pmd, addr, mask);
  	if (!pte)
  		return -ENOMEM;
  	do {
  		BUG_ON(!pte_none(*pte));
f7ee1f13d   Christophe Leroy   mm/vmalloc: enabl...
108
109
110
111
112
113
114
115
116
117
118
119
120
  
  #ifdef CONFIG_HUGETLB_PAGE
  		size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift);
  		if (size != PAGE_SIZE) {
  			pte_t entry = pfn_pte(pfn, prot);
  
  			entry = pte_mkhuge(entry);
  			entry = arch_make_huge_pte(entry, ilog2(size), 0);
  			set_huge_pte_at(&init_mm, addr, pte, entry);
  			pfn += PFN_DOWN(size);
  			continue;
  		}
  #endif
5e9e3d777   Nicholas Piggin   mm: move vmap_ran...
121
122
  		set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
  		pfn++;
f7ee1f13d   Christophe Leroy   mm/vmalloc: enabl...
123
  	} while (pte += PFN_DOWN(size), addr += size, addr != end);
5e9e3d777   Nicholas Piggin   mm: move vmap_ran...
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
  	*mask |= PGTBL_PTE_MODIFIED;
  	return 0;
  }
  
  static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
  			phys_addr_t phys_addr, pgprot_t prot,
  			unsigned int max_page_shift)
  {
  	if (max_page_shift < PMD_SHIFT)
  		return 0;
  
  	if (!arch_vmap_pmd_supported(prot))
  		return 0;
  
  	if ((end - addr) != PMD_SIZE)
  		return 0;
  
  	if (!IS_ALIGNED(addr, PMD_SIZE))
  		return 0;
  
  	if (!IS_ALIGNED(phys_addr, PMD_SIZE))
  		return 0;
  
  	if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr))
  		return 0;
  
  	return pmd_set_huge(pmd, phys_addr, prot);
  }
  
  static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
  			phys_addr_t phys_addr, pgprot_t prot,
  			unsigned int max_page_shift, pgtbl_mod_mask *mask)
  {
  	pmd_t *pmd;
  	unsigned long next;
  
  	pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
  	if (!pmd)
  		return -ENOMEM;
  	do {
  		next = pmd_addr_end(addr, end);
  
  		if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot,
  					max_page_shift)) {
  			*mask |= PGTBL_PMD_MODIFIED;
  			continue;
  		}
f7ee1f13d   Christophe Leroy   mm/vmalloc: enabl...
171
  		if (vmap_pte_range(pmd, addr, next, phys_addr, prot, max_page_shift, mask))
5e9e3d777   Nicholas Piggin   mm: move vmap_ran...
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
  			return -ENOMEM;
  	} while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
  	return 0;
  }
  
  static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
  			phys_addr_t phys_addr, pgprot_t prot,
  			unsigned int max_page_shift)
  {
  	if (max_page_shift < PUD_SHIFT)
  		return 0;
  
  	if (!arch_vmap_pud_supported(prot))
  		return 0;
  
  	if ((end - addr) != PUD_SIZE)
  		return 0;
  
  	if (!IS_ALIGNED(addr, PUD_SIZE))
  		return 0;
  
  	if (!IS_ALIGNED(phys_addr, PUD_SIZE))
  		return 0;
  
  	if (pud_present(*pud) && !pud_free_pmd_page(pud, addr))
  		return 0;
  
  	return pud_set_huge(pud, phys_addr, prot);
  }
  
  static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
  			phys_addr_t phys_addr, pgprot_t prot,
  			unsigned int max_page_shift, pgtbl_mod_mask *mask)
  {
  	pud_t *pud;
  	unsigned long next;
  
  	pud = pud_alloc_track(&init_mm, p4d, addr, mask);
  	if (!pud)
  		return -ENOMEM;
  	do {
  		next = pud_addr_end(addr, end);
  
  		if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot,
  					max_page_shift)) {
  			*mask |= PGTBL_PUD_MODIFIED;
  			continue;
  		}
  
  		if (vmap_pmd_range(pud, addr, next, phys_addr, prot,
  					max_page_shift, mask))
  			return -ENOMEM;
  	} while (pud++, phys_addr += (next - addr), addr = next, addr != end);
  	return 0;
  }
  
  static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
  			phys_addr_t phys_addr, pgprot_t prot,
  			unsigned int max_page_shift)
  {
  	if (max_page_shift < P4D_SHIFT)
  		return 0;
  
  	if (!arch_vmap_p4d_supported(prot))
  		return 0;
  
  	if ((end - addr) != P4D_SIZE)
  		return 0;
  
  	if (!IS_ALIGNED(addr, P4D_SIZE))
  		return 0;
  
  	if (!IS_ALIGNED(phys_addr, P4D_SIZE))
  		return 0;
  
  	if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr))
  		return 0;
  
  	return p4d_set_huge(p4d, phys_addr, prot);
  }
  
  static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
  			phys_addr_t phys_addr, pgprot_t prot,
  			unsigned int max_page_shift, pgtbl_mod_mask *mask)
  {
  	p4d_t *p4d;
  	unsigned long next;
  
  	p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
  	if (!p4d)
  		return -ENOMEM;
  	do {
  		next = p4d_addr_end(addr, end);
  
  		if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot,
  					max_page_shift)) {
  			*mask |= PGTBL_P4D_MODIFIED;
  			continue;
  		}
  
  		if (vmap_pud_range(p4d, addr, next, phys_addr, prot,
  					max_page_shift, mask))
  			return -ENOMEM;
  	} while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
  	return 0;
  }
5d87510de   Nicholas Piggin   mm/vmalloc: add v...
278
  static int vmap_range_noflush(unsigned long addr, unsigned long end,
5e9e3d777   Nicholas Piggin   mm: move vmap_ran...
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
  			phys_addr_t phys_addr, pgprot_t prot,
  			unsigned int max_page_shift)
  {
  	pgd_t *pgd;
  	unsigned long start;
  	unsigned long next;
  	int err;
  	pgtbl_mod_mask mask = 0;
  
  	might_sleep();
  	BUG_ON(addr >= end);
  
  	start = addr;
  	pgd = pgd_offset_k(addr);
  	do {
  		next = pgd_addr_end(addr, end);
  		err = vmap_p4d_range(pgd, addr, next, phys_addr, prot,
  					max_page_shift, &mask);
  		if (err)
  			break;
  	} while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
5e9e3d777   Nicholas Piggin   mm: move vmap_ran...
300
301
302
303
304
  	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
  		arch_sync_kernel_mappings(start, end);
  
  	return err;
  }
b221385bc   Adrian Bunk   [PATCH] mm/: make...
305

82a70ce04   Christoph Hellwig   mm: move ioremap_...
306
307
  int ioremap_page_range(unsigned long addr, unsigned long end,
  		phys_addr_t phys_addr, pgprot_t prot)
5d87510de   Nicholas Piggin   mm/vmalloc: add v...
308
309
  {
  	int err;
319e703c7   Peng Fan   LF-4658 Revert "m...
310
  	err = vmap_range_noflush(addr, end, phys_addr, prot,
82a70ce04   Christoph Hellwig   mm: move ioremap_...
311
  				 ioremap_max_page_shift);
5d87510de   Nicholas Piggin   mm/vmalloc: add v...
312
  	flush_cache_vmap(addr, end);
5d87510de   Nicholas Piggin   mm/vmalloc: add v...
313
314
  	return err;
  }
c34c51708   Jan Kiszka   mm: Re-export ior...
315
  EXPORT_SYMBOL_GPL(ioremap_page_range);
5d87510de   Nicholas Piggin   mm/vmalloc: add v...
316

2ba3e6947   Joerg Roedel   mm/vmalloc: track...
317
318
  static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
319
320
321
322
323
324
325
326
  {
  	pte_t *pte;
  
  	pte = pte_offset_kernel(pmd, addr);
  	do {
  		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
  		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
  	} while (pte++, addr += PAGE_SIZE, addr != end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
327
  	*mask |= PGTBL_PTE_MODIFIED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
328
  }
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
329
330
  static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
331
332
333
  {
  	pmd_t *pmd;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
334
  	int cleared;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
335
336
337
338
  
  	pmd = pmd_offset(pud, addr);
  	do {
  		next = pmd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
339
340
341
342
343
344
  
  		cleared = pmd_clear_huge(pmd);
  		if (cleared || pmd_bad(*pmd))
  			*mask |= PGTBL_PMD_MODIFIED;
  
  		if (cleared)
b9820d8f3   Toshi Kani   mm: change vunmap...
345
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
346
347
  		if (pmd_none_or_clear_bad(pmd))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
348
  		vunmap_pte_range(pmd, addr, next, mask);
e47110e90   Aneesh Kumar K.V   mm/vunmap: add co...
349
350
  
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
351
352
  	} while (pmd++, addr = next, addr != end);
  }
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
353
354
  static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
355
356
357
  {
  	pud_t *pud;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
358
  	int cleared;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
359

c2febafc6   Kirill A. Shutemov   mm: convert gener...
360
  	pud = pud_offset(p4d, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
361
362
  	do {
  		next = pud_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
363
364
365
366
367
368
  
  		cleared = pud_clear_huge(pud);
  		if (cleared || pud_bad(*pud))
  			*mask |= PGTBL_PUD_MODIFIED;
  
  		if (cleared)
b9820d8f3   Toshi Kani   mm: change vunmap...
369
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
371
  		if (pud_none_or_clear_bad(pud))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
372
  		vunmap_pmd_range(pud, addr, next, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
373
374
  	} while (pud++, addr = next, addr != end);
  }
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
375
376
  static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
377
378
379
  {
  	p4d_t *p4d;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
380
  	int cleared;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
381
382
383
384
  
  	p4d = p4d_offset(pgd, addr);
  	do {
  		next = p4d_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
385
386
387
388
389
390
  
  		cleared = p4d_clear_huge(p4d);
  		if (cleared || p4d_bad(*p4d))
  			*mask |= PGTBL_P4D_MODIFIED;
  
  		if (cleared)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
391
392
393
  			continue;
  		if (p4d_none_or_clear_bad(p4d))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
394
  		vunmap_pud_range(p4d, addr, next, mask);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
395
396
  	} while (p4d++, addr = next, addr != end);
  }
4ad0ae8c6   Nicholas Piggin   mm/vmalloc: remov...
397
398
399
  /*
   * vunmap_range_noflush is similar to vunmap_range, but does not
   * flush caches or TLBs.
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
400
   *
4ad0ae8c6   Nicholas Piggin   mm/vmalloc: remov...
401
402
403
404
405
   * The caller is responsible for calling flush_cache_vmap() before calling
   * this function, and flush_tlb_kernel_range after it has returned
   * successfully (and before the addresses are expected to cause a page fault
   * or be re-mapped for something else, if TLB flushes are being delayed or
   * coalesced).
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
406
   *
4ad0ae8c6   Nicholas Piggin   mm/vmalloc: remov...
407
   * This is an internal function only. Do not use outside mm/.
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
408
   */
4ad0ae8c6   Nicholas Piggin   mm/vmalloc: remov...
409
  void vunmap_range_noflush(unsigned long start, unsigned long end)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
411
  	unsigned long next;
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
412
  	pgd_t *pgd;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
413
414
  	unsigned long addr = start;
  	pgtbl_mod_mask mask = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415
416
417
  
  	BUG_ON(addr >= end);
  	pgd = pgd_offset_k(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418
419
  	do {
  		next = pgd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
420
421
  		if (pgd_bad(*pgd))
  			mask |= PGTBL_PGD_MODIFIED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
422
423
  		if (pgd_none_or_clear_bad(pgd))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
424
  		vunmap_p4d_range(pgd, addr, next, &mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
  	} while (pgd++, addr = next, addr != end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
426
427
428
  
  	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
  		arch_sync_kernel_mappings(start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
429
  }
4ad0ae8c6   Nicholas Piggin   mm/vmalloc: remov...
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
  /**
   * vunmap_range - unmap kernel virtual addresses
   * @addr: start of the VM area to unmap
   * @end: end of the VM area to unmap (non-inclusive)
   *
   * Clears any present PTEs in the virtual address range, flushes TLBs and
   * caches. Any subsequent access to the address before it has been re-mapped
   * is a kernel bug.
   */
  void vunmap_range(unsigned long addr, unsigned long end)
  {
  	flush_cache_vunmap(addr, end);
  	vunmap_range_noflush(addr, end);
  	flush_tlb_kernel_range(addr, end);
  }
0a2648840   Nicholas Piggin   mm/vmalloc: renam...
445
  static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
446
447
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
448
449
  {
  	pte_t *pte;
db64fe022   Nick Piggin   mm: rewrite vmap ...
450
451
452
453
  	/*
  	 * nr is a running index into the array which helps higher level
  	 * callers keep track of where we're up to.
  	 */
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
454
  	pte = pte_alloc_kernel_track(pmd, addr, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
456
457
  	if (!pte)
  		return -ENOMEM;
  	do {
db64fe022   Nick Piggin   mm: rewrite vmap ...
458
459
460
461
462
  		struct page *page = pages[*nr];
  
  		if (WARN_ON(!pte_none(*pte)))
  			return -EBUSY;
  		if (WARN_ON(!page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
463
464
  			return -ENOMEM;
  		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
db64fe022   Nick Piggin   mm: rewrite vmap ...
465
  		(*nr)++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
466
  	} while (pte++, addr += PAGE_SIZE, addr != end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
467
  	*mask |= PGTBL_PTE_MODIFIED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
468
469
  	return 0;
  }
0a2648840   Nicholas Piggin   mm/vmalloc: renam...
470
  static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
471
472
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473
474
475
  {
  	pmd_t *pmd;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
476
  	pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477
478
479
480
  	if (!pmd)
  		return -ENOMEM;
  	do {
  		next = pmd_addr_end(addr, end);
0a2648840   Nicholas Piggin   mm/vmalloc: renam...
481
  		if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr, mask))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
482
483
484
485
  			return -ENOMEM;
  	} while (pmd++, addr = next, addr != end);
  	return 0;
  }
0a2648840   Nicholas Piggin   mm/vmalloc: renam...
486
  static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
487
488
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
489
490
491
  {
  	pud_t *pud;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
492
  	pud = pud_alloc_track(&init_mm, p4d, addr, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
493
494
495
496
  	if (!pud)
  		return -ENOMEM;
  	do {
  		next = pud_addr_end(addr, end);
0a2648840   Nicholas Piggin   mm/vmalloc: renam...
497
  		if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr, mask))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
498
499
500
501
  			return -ENOMEM;
  	} while (pud++, addr = next, addr != end);
  	return 0;
  }
0a2648840   Nicholas Piggin   mm/vmalloc: renam...
502
  static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
503
504
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
505
506
507
  {
  	p4d_t *p4d;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
508
  	p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
509
510
511
512
  	if (!p4d)
  		return -ENOMEM;
  	do {
  		next = p4d_addr_end(addr, end);
0a2648840   Nicholas Piggin   mm/vmalloc: renam...
513
  		if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr, mask))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
514
515
516
517
  			return -ENOMEM;
  	} while (p4d++, addr = next, addr != end);
  	return 0;
  }
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
518
519
  static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
  		pgprot_t prot, struct page **pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
520
  {
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
521
  	unsigned long start = addr;
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
522
  	pgd_t *pgd;
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
523
  	unsigned long next;
db64fe022   Nick Piggin   mm: rewrite vmap ...
524
525
  	int err = 0;
  	int nr = 0;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
526
  	pgtbl_mod_mask mask = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
527
528
529
  
  	BUG_ON(addr >= end);
  	pgd = pgd_offset_k(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
530
531
  	do {
  		next = pgd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
532
533
  		if (pgd_bad(*pgd))
  			mask |= PGTBL_PGD_MODIFIED;
0a2648840   Nicholas Piggin   mm/vmalloc: renam...
534
  		err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
535
  		if (err)
bf88c8c83   Figo.zhang   vmalloc.c: fix do...
536
  			return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
  	} while (pgd++, addr = next, addr != end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
538

2ba3e6947   Joerg Roedel   mm/vmalloc: track...
539
540
  	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
  		arch_sync_kernel_mappings(start, end);
60bb44652   Christoph Hellwig   mm: don't return ...
541
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542
  }
b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
543
544
545
546
547
548
549
550
551
552
  /*
   * vmap_pages_range_noflush is similar to vmap_pages_range, but does not
   * flush caches.
   *
   * The caller is responsible for calling flush_cache_vmap() after this
   * function returns successfully and before the addresses are accessed.
   *
   * This is an internal function only. Do not use outside mm/.
   */
  int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
  		pgprot_t prot, struct page **pages, unsigned int page_shift)
  {
  	unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
  
  	WARN_ON(page_shift < PAGE_SHIFT);
  
  	if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) ||
  			page_shift == PAGE_SHIFT)
  		return vmap_small_pages_range_noflush(addr, end, prot, pages);
  
  	for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
  		int err;
  
  		err = vmap_range_noflush(addr, addr + (1UL << page_shift),
  					__pa(page_address(pages[i])), prot,
  					page_shift);
  		if (err)
  			return err;
  
  		addr += 1UL << page_shift;
  	}
  
  	return 0;
  }
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
577
  /**
b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
578
   * vmap_pages_range - map pages to a kernel virtual address
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
579
   * @addr: start of the VM area to map
b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
580
   * @end: end of the VM area to map (non-inclusive)
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
581
   * @prot: page protection flags to use
b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
582
583
584
   * @pages: pages to map (always PAGE_SIZE pages)
   * @page_shift: maximum shift that the pages may be mapped with, @pages must
   * be aligned and contiguous up to at least this shift.
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
585
586
587
588
   *
   * RETURNS:
   * 0 on success, -errno on failure.
   */
b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
589
590
  static int vmap_pages_range(unsigned long addr, unsigned long end,
  		pgprot_t prot, struct page **pages, unsigned int page_shift)
8fc489850   Tejun Heo   vmalloc: add un/m...
591
  {
b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
592
  	int err;
8fc489850   Tejun Heo   vmalloc: add un/m...
593

b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
594
595
596
  	err = vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
  	flush_cache_vmap(addr, end);
  	return err;
8fc489850   Tejun Heo   vmalloc: add un/m...
597
  }
81ac3ad90   KAMEZAWA Hiroyuki   kcore: register m...
598
  int is_vmalloc_or_module_addr(const void *x)
73bdf0a60   Linus Torvalds   Introduce is_vmal...
599
600
  {
  	/*
ab4f2ee13   Russell King   [ARM] fix naming ...
601
  	 * ARM, x86-64 and sparc64 put modules in a special place,
73bdf0a60   Linus Torvalds   Introduce is_vmal...
602
603
604
605
606
607
608
609
610
611
  	 * and fall back on vmalloc() if that fails. Others
  	 * just put it in the vmalloc space.
  	 */
  #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
  	unsigned long addr = (unsigned long)x;
  	if (addr >= MODULES_VADDR && addr < MODULES_END)
  		return 1;
  #endif
  	return is_vmalloc_addr(x);
  }
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
612
  /*
c0eb315ad   Nicholas Piggin   mm/vmalloc: fix H...
613
614
615
   * Walk a vmap address to the struct page it maps. Huge vmap mappings will
   * return the tail page that corresponds to the base page address, which
   * matches small vmap mappings.
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
616
   */
add688fbd   malc   Revert "mm/vmallo...
617
  struct page *vmalloc_to_page(const void *vmalloc_addr)
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
618
619
  {
  	unsigned long addr = (unsigned long) vmalloc_addr;
add688fbd   malc   Revert "mm/vmallo...
620
  	struct page *page = NULL;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
621
  	pgd_t *pgd = pgd_offset_k(addr);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
622
623
624
625
  	p4d_t *p4d;
  	pud_t *pud;
  	pmd_t *pmd;
  	pte_t *ptep, pte;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
626

7aa413def   Ingo Molnar   x86, MM: virtual ...
627
628
629
630
  	/*
  	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
  	 * architectures that do not vmalloc module space
  	 */
73bdf0a60   Linus Torvalds   Introduce is_vmal...
631
  	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
59ea74633   Jiri Slaby   MM: virtual addre...
632

c2febafc6   Kirill A. Shutemov   mm: convert gener...
633
634
  	if (pgd_none(*pgd))
  		return NULL;
c0eb315ad   Nicholas Piggin   mm/vmalloc: fix H...
635
636
637
638
  	if (WARN_ON_ONCE(pgd_leaf(*pgd)))
  		return NULL; /* XXX: no allowance for huge pgd */
  	if (WARN_ON_ONCE(pgd_bad(*pgd)))
  		return NULL;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
639
640
641
  	p4d = p4d_offset(pgd, addr);
  	if (p4d_none(*p4d))
  		return NULL;
c0eb315ad   Nicholas Piggin   mm/vmalloc: fix H...
642
643
644
645
  	if (p4d_leaf(*p4d))
  		return p4d_page(*p4d) + ((addr & ~P4D_MASK) >> PAGE_SHIFT);
  	if (WARN_ON_ONCE(p4d_bad(*p4d)))
  		return NULL;
029c54b09   Ard Biesheuvel   mm/vmalloc.c: hug...
646

c0eb315ad   Nicholas Piggin   mm/vmalloc: fix H...
647
648
649
650
651
652
  	pud = pud_offset(p4d, addr);
  	if (pud_none(*pud))
  		return NULL;
  	if (pud_leaf(*pud))
  		return pud_page(*pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
  	if (WARN_ON_ONCE(pud_bad(*pud)))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
653
  		return NULL;
c0eb315ad   Nicholas Piggin   mm/vmalloc: fix H...
654

c2febafc6   Kirill A. Shutemov   mm: convert gener...
655
  	pmd = pmd_offset(pud, addr);
c0eb315ad   Nicholas Piggin   mm/vmalloc: fix H...
656
657
658
659
660
  	if (pmd_none(*pmd))
  		return NULL;
  	if (pmd_leaf(*pmd))
  		return pmd_page(*pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
  	if (WARN_ON_ONCE(pmd_bad(*pmd)))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
661
662
663
664
665
666
667
  		return NULL;
  
  	ptep = pte_offset_map(pmd, addr);
  	pte = *ptep;
  	if (pte_present(pte))
  		page = pte_page(pte);
  	pte_unmap(ptep);
c0eb315ad   Nicholas Piggin   mm/vmalloc: fix H...
668

add688fbd   malc   Revert "mm/vmallo...
669
  	return page;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
670
  }
add688fbd   malc   Revert "mm/vmallo...
671
  EXPORT_SYMBOL(vmalloc_to_page);
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
672
673
  
  /*
add688fbd   malc   Revert "mm/vmallo...
674
   * Map a vmalloc()-space virtual address to the physical page frame number.
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
675
   */
add688fbd   malc   Revert "mm/vmallo...
676
  unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
677
  {
add688fbd   malc   Revert "mm/vmallo...
678
  	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
679
  }
add688fbd   malc   Revert "mm/vmallo...
680
  EXPORT_SYMBOL(vmalloc_to_pfn);
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
681

db64fe022   Nick Piggin   mm: rewrite vmap ...
682
683
  
  /*** Global kva allocator ***/
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
684
  #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
685
  #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
686

db64fe022   Nick Piggin   mm: rewrite vmap ...
687

db64fe022   Nick Piggin   mm: rewrite vmap ...
688
  static DEFINE_SPINLOCK(vmap_area_lock);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
689
  static DEFINE_SPINLOCK(free_vmap_area_lock);
f1c4069e1   Joonsoo Kim   mm, vmalloc: expo...
690
691
  /* Export for kexec only */
  LIST_HEAD(vmap_area_list);
89699605f   Nick Piggin   mm: vmap area cache
692
  static struct rb_root vmap_area_root = RB_ROOT;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
693
  static bool vmap_initialized __read_mostly;
89699605f   Nick Piggin   mm: vmap area cache
694

96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
695
696
697
  static struct rb_root purge_vmap_area_root = RB_ROOT;
  static LIST_HEAD(purge_vmap_area_list);
  static DEFINE_SPINLOCK(purge_vmap_area_lock);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
  /*
   * This kmem_cache is used for vmap_area objects. Instead of
   * allocating from slab we reuse an object from this cache to
   * make things faster. Especially in "no edge" splitting of
   * free block.
   */
  static struct kmem_cache *vmap_area_cachep;
  
  /*
   * This linked list is used in pair with free_vmap_area_root.
   * It gives O(1) access to prev/next to perform fast coalescing.
   */
  static LIST_HEAD(free_vmap_area_list);
  
  /*
   * This augment red-black tree represents the free vmap space.
   * All vmap_area objects in this tree are sorted by va->va_start
   * address. It is used for allocation and merging when a vmap
   * object is released.
   *
   * Each vmap_area node contains a maximum available free block
   * of its sub-tree, right or left. Therefore it is possible to
   * find a lowest match of free area.
   */
  static struct rb_root free_vmap_area_root = RB_ROOT;
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
723
724
725
726
727
728
  /*
   * Preload a CPU with one object for "no edge" split case. The
   * aim is to get rid of allocations from the atomic context, thus
   * to use more permissive allocation masks.
   */
  static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
729
730
731
732
733
734
735
736
737
738
739
740
741
742
  static __always_inline unsigned long
  va_size(struct vmap_area *va)
  {
  	return (va->va_end - va->va_start);
  }
  
  static __always_inline unsigned long
  get_subtree_max_size(struct rb_node *node)
  {
  	struct vmap_area *va;
  
  	va = rb_entry_safe(node, struct vmap_area, rb_node);
  	return va ? va->subtree_max_size : 0;
  }
89699605f   Nick Piggin   mm: vmap area cache
743

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
744
745
746
747
748
749
750
751
752
753
  /*
   * Gets called when remove the node and rotate.
   */
  static __always_inline unsigned long
  compute_subtree_max_size(struct vmap_area *va)
  {
  	return max3(va_size(va),
  		get_subtree_max_size(va->rb_node.rb_left),
  		get_subtree_max_size(va->rb_node.rb_right));
  }
315cc066b   Michel Lespinasse   augmented rbtree:...
754
755
  RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
  	struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
756
757
758
759
  
  static void purge_vmap_area_lazy(void);
  static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
  static unsigned long lazy_max_pages(void);
db64fe022   Nick Piggin   mm: rewrite vmap ...
760

97105f0ab   Roman Gushchin   mm: vmalloc: show...
761
762
763
764
765
766
  static atomic_long_t nr_vmalloc_pages;
  
  unsigned long vmalloc_nr_pages(void)
  {
  	return atomic_long_read(&nr_vmalloc_pages);
  }
f181234a5   Chen Wandun   mm/vmalloc: fix w...
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
  static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
  {
  	struct vmap_area *va = NULL;
  	struct rb_node *n = vmap_area_root.rb_node;
  
  	while (n) {
  		struct vmap_area *tmp;
  
  		tmp = rb_entry(n, struct vmap_area, rb_node);
  		if (tmp->va_end > addr) {
  			va = tmp;
  			if (tmp->va_start <= addr)
  				break;
  
  			n = n->rb_left;
  		} else
  			n = n->rb_right;
  	}
  
  	return va;
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
788
  static struct vmap_area *__find_vmap_area(unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
790
791
792
793
794
795
796
797
  	struct rb_node *n = vmap_area_root.rb_node;
  
  	while (n) {
  		struct vmap_area *va;
  
  		va = rb_entry(n, struct vmap_area, rb_node);
  		if (addr < va->va_start)
  			n = n->rb_left;
cef2ac3f6   HATAYAMA Daisuke   vmalloc: make fin...
798
  		else if (addr >= va->va_end)
db64fe022   Nick Piggin   mm: rewrite vmap ...
799
800
801
802
803
804
805
  			n = n->rb_right;
  		else
  			return va;
  	}
  
  	return NULL;
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
806
807
808
  /*
   * This function returns back addresses of parent node
   * and its left or right link for further processing.
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
809
810
811
812
   *
   * Otherwise NULL is returned. In that case all further
   * steps regarding inserting of conflicting overlap range
   * have to be declined and actually considered as a bug.
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
   */
  static __always_inline struct rb_node **
  find_va_links(struct vmap_area *va,
  	struct rb_root *root, struct rb_node *from,
  	struct rb_node **parent)
  {
  	struct vmap_area *tmp_va;
  	struct rb_node **link;
  
  	if (root) {
  		link = &root->rb_node;
  		if (unlikely(!*link)) {
  			*parent = NULL;
  			return link;
  		}
  	} else {
  		link = &from;
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
831

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
832
833
834
835
836
837
838
  	/*
  	 * Go to the bottom of the tree. When we hit the last point
  	 * we end up with parent rb_node and correct direction, i name
  	 * it link, where the new va->rb_node will be attached to.
  	 */
  	do {
  		tmp_va = rb_entry(*link, struct vmap_area, rb_node);
db64fe022   Nick Piggin   mm: rewrite vmap ...
839

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
840
841
842
843
844
845
846
847
848
849
850
  		/*
  		 * During the traversal we also do some sanity check.
  		 * Trigger the BUG() if there are sides(left/right)
  		 * or full overlaps.
  		 */
  		if (va->va_start < tmp_va->va_end &&
  				va->va_end <= tmp_va->va_start)
  			link = &(*link)->rb_left;
  		else if (va->va_end > tmp_va->va_start &&
  				va->va_start >= tmp_va->va_end)
  			link = &(*link)->rb_right;
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
851
852
853
854
855
856
857
  		else {
  			WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx
  ",
  				va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end);
  
  			return NULL;
  		}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
  	} while (*link);
  
  	*parent = &tmp_va->rb_node;
  	return link;
  }
  
  static __always_inline struct list_head *
  get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
  {
  	struct list_head *list;
  
  	if (unlikely(!parent))
  		/*
  		 * The red-black tree where we try to find VA neighbors
  		 * before merging or inserting is empty, i.e. it means
  		 * there is no free vmap space. Normally it does not
  		 * happen but we handle this case anyway.
  		 */
  		return NULL;
  
  	list = &rb_entry(parent, struct vmap_area, rb_node)->list;
  	return (&parent->rb_right == link ? list->next : list);
  }
  
  static __always_inline void
  link_va(struct vmap_area *va, struct rb_root *root,
  	struct rb_node *parent, struct rb_node **link, struct list_head *head)
  {
  	/*
  	 * VA is still not in the list, but we can
  	 * identify its future previous list_head node.
  	 */
  	if (likely(parent)) {
  		head = &rb_entry(parent, struct vmap_area, rb_node)->list;
  		if (&parent->rb_right != link)
  			head = head->prev;
db64fe022   Nick Piggin   mm: rewrite vmap ...
894
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
  	/* Insert to the rb-tree */
  	rb_link_node(&va->rb_node, parent, link);
  	if (root == &free_vmap_area_root) {
  		/*
  		 * Some explanation here. Just perform simple insertion
  		 * to the tree. We do not set va->subtree_max_size to
  		 * its current size before calling rb_insert_augmented().
  		 * It is because of we populate the tree from the bottom
  		 * to parent levels when the node _is_ in the tree.
  		 *
  		 * Therefore we set subtree_max_size to zero after insertion,
  		 * to let __augment_tree_propagate_from() puts everything to
  		 * the correct order later on.
  		 */
  		rb_insert_augmented(&va->rb_node,
  			root, &free_vmap_area_rb_augment_cb);
  		va->subtree_max_size = 0;
  	} else {
  		rb_insert_color(&va->rb_node, root);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
915

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
916
917
  	/* Address-sort this list */
  	list_add(&va->list, head);
db64fe022   Nick Piggin   mm: rewrite vmap ...
918
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
919
920
921
  static __always_inline void
  unlink_va(struct vmap_area *va, struct rb_root *root)
  {
460e42d19   Uladzislau Rezki (Sony)   mm/vmalloc.c: swi...
922
923
  	if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
  		return;
db64fe022   Nick Piggin   mm: rewrite vmap ...
924

460e42d19   Uladzislau Rezki (Sony)   mm/vmalloc.c: swi...
925
926
927
928
929
930
931
932
  	if (root == &free_vmap_area_root)
  		rb_erase_augmented(&va->rb_node,
  			root, &free_vmap_area_rb_augment_cb);
  	else
  		rb_erase(&va->rb_node, root);
  
  	list_del(&va->list);
  	RB_CLEAR_NODE(&va->rb_node);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
933
  }
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
934
935
  #if DEBUG_AUGMENT_PROPAGATE_CHECK
  static void
da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
936
  augment_tree_propagate_check(void)
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
937
938
  {
  	struct vmap_area *va;
da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
939
  	unsigned long computed_size;
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
940

da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
941
942
943
944
945
946
  	list_for_each_entry(va, &free_vmap_area_list, list) {
  		computed_size = compute_subtree_max_size(va);
  		if (computed_size != va->subtree_max_size)
  			pr_emerg("tree is corrupted: %lu, %lu
  ",
  				va_size(va), va->subtree_max_size);
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
947
  	}
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
948
949
  }
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
  /*
   * This function populates subtree_max_size from bottom to upper
   * levels starting from VA point. The propagation must be done
   * when VA size is modified by changing its va_start/va_end. Or
   * in case of newly inserting of VA to the tree.
   *
   * It means that __augment_tree_propagate_from() must be called:
   * - After VA has been inserted to the tree(free path);
   * - After VA has been shrunk(allocation path);
   * - After VA has been increased(merging path).
   *
   * Please note that, it does not mean that upper parent nodes
   * and their subtree_max_size are recalculated all the time up
   * to the root node.
   *
   *       4--8
   *        /\
   *       /  \
   *      /    \
   *    2--2  8--8
   *
   * For example if we modify the node 4, shrinking it to 2, then
   * no any modification is required. If we shrink the node 2 to 1
   * its subtree_max_size is updated only, and set to 1. If we shrink
   * the node 8 to 6, then its subtree_max_size is set to 6 and parent
   * node becomes 4--6.
   */
  static __always_inline void
  augment_tree_propagate_from(struct vmap_area *va)
  {
15ae144f7   Uladzislau Rezki (Sony)   mm/vmalloc: switc...
980
981
982
983
984
985
  	/*
  	 * Populate the tree from bottom towards the root until
  	 * the calculated maximum available size of checked node
  	 * is equal to its current one.
  	 */
  	free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL);
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
986
987
  
  #if DEBUG_AUGMENT_PROPAGATE_CHECK
da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
988
  	augment_tree_propagate_check();
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
989
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
990
991
992
993
994
995
996
997
998
999
  }
  
  static void
  insert_vmap_area(struct vmap_area *va,
  	struct rb_root *root, struct list_head *head)
  {
  	struct rb_node **link;
  	struct rb_node *parent;
  
  	link = find_va_links(va, root, NULL, &parent);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1000
1001
  	if (link)
  		link_va(va, root, parent, link, head);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
  }
  
  static void
  insert_vmap_area_augment(struct vmap_area *va,
  	struct rb_node *from, struct rb_root *root,
  	struct list_head *head)
  {
  	struct rb_node **link;
  	struct rb_node *parent;
  
  	if (from)
  		link = find_va_links(va, NULL, from, &parent);
  	else
  		link = find_va_links(va, root, NULL, &parent);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1016
1017
1018
1019
  	if (link) {
  		link_va(va, root, parent, link, head);
  		augment_tree_propagate_from(va);
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1020
1021
1022
1023
1024
1025
1026
  }
  
  /*
   * Merge de-allocated chunk of VA memory with previous
   * and next free blocks. If coalesce is not done a new
   * free area is inserted. If VA has been merged, it is
   * freed.
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1027
1028
1029
1030
1031
   *
   * Please note, it can return NULL in case of overlap
   * ranges, followed by WARN() report. Despite it is a
   * buggy behaviour, a system can be alive and keep
   * ongoing.
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1032
   */
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1033
  static __always_inline struct vmap_area *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
  merge_or_add_vmap_area(struct vmap_area *va,
  	struct rb_root *root, struct list_head *head)
  {
  	struct vmap_area *sibling;
  	struct list_head *next;
  	struct rb_node **link;
  	struct rb_node *parent;
  	bool merged = false;
  
  	/*
  	 * Find a place in the tree where VA potentially will be
  	 * inserted, unless it is merged with its sibling/siblings.
  	 */
  	link = find_va_links(va, root, NULL, &parent);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1048
1049
  	if (!link)
  		return NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
  
  	/*
  	 * Get next node of VA to check if merging can be done.
  	 */
  	next = get_va_next_sibling(parent, link);
  	if (unlikely(next == NULL))
  		goto insert;
  
  	/*
  	 * start            end
  	 * |                |
  	 * |<------VA------>|<-----Next----->|
  	 *                  |                |
  	 *                  start            end
  	 */
  	if (next != head) {
  		sibling = list_entry(next, struct vmap_area, list);
  		if (sibling->va_start == va->va_end) {
  			sibling->va_start = va->va_start;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
  			/* Free vmap_area object. */
  			kmem_cache_free(vmap_area_cachep, va);
  
  			/* Point to the new merged area. */
  			va = sibling;
  			merged = true;
  		}
  	}
  
  	/*
  	 * start            end
  	 * |                |
  	 * |<-----Prev----->|<------VA------>|
  	 *                  |                |
  	 *                  start            end
  	 */
  	if (next->prev != head) {
  		sibling = list_entry(next->prev, struct vmap_area, list);
  		if (sibling->va_end == va->va_start) {
5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
1088
1089
1090
1091
1092
1093
1094
  			/*
  			 * If both neighbors are coalesced, it is important
  			 * to unlink the "next" node first, followed by merging
  			 * with "previous" one. Otherwise the tree might not be
  			 * fully populated if a sibling's augmented value is
  			 * "normalized" because of rotation operations.
  			 */
54f63d9d8   Uladzislau Rezki (Sony)   mm/vmalloc.c: get...
1095
1096
  			if (merged)
  				unlink_va(va, root);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1097

5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
1098
  			sibling->va_end = va->va_end;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1099
1100
  			/* Free vmap_area object. */
  			kmem_cache_free(vmap_area_cachep, va);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1101
1102
1103
1104
  
  			/* Point to the new merged area. */
  			va = sibling;
  			merged = true;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1105
1106
1107
1108
  		}
  	}
  
  insert:
5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
1109
  	if (!merged)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1110
  		link_va(va, root, parent, link, head);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1111

96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
  	return va;
  }
  
  static __always_inline struct vmap_area *
  merge_or_add_vmap_area_augment(struct vmap_area *va,
  	struct rb_root *root, struct list_head *head)
  {
  	va = merge_or_add_vmap_area(va, root, head);
  	if (va)
  		augment_tree_propagate_from(va);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1122
  	return va;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
  }
  
  static __always_inline bool
  is_within_this_va(struct vmap_area *va, unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	unsigned long nva_start_addr;
  
  	if (va->va_start > vstart)
  		nva_start_addr = ALIGN(va->va_start, align);
  	else
  		nva_start_addr = ALIGN(vstart, align);
  
  	/* Can be overflowed due to big size or alignment. */
  	if (nva_start_addr + size < nva_start_addr ||
  			nva_start_addr < vstart)
  		return false;
  
  	return (nva_start_addr + size <= va->va_end);
  }
  
  /*
   * Find the first free block(lowest start address) in the tree,
   * that will accomplish the request corresponding to passing
   * parameters.
   */
  static __always_inline struct vmap_area *
  find_vmap_lowest_match(unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	struct vmap_area *va;
  	struct rb_node *node;
  	unsigned long length;
  
  	/* Start from the root. */
  	node = free_vmap_area_root.rb_node;
  
  	/* Adjust the search size for alignment overhead. */
  	length = size + align - 1;
  
  	while (node) {
  		va = rb_entry(node, struct vmap_area, rb_node);
  
  		if (get_subtree_max_size(node->rb_left) >= length &&
  				vstart < va->va_start) {
  			node = node->rb_left;
  		} else {
  			if (is_within_this_va(va, size, align, vstart))
  				return va;
  
  			/*
  			 * Does not make sense to go deeper towards the right
  			 * sub-tree if it does not have a free block that is
  			 * equal or bigger to the requested search length.
  			 */
  			if (get_subtree_max_size(node->rb_right) >= length) {
  				node = node->rb_right;
  				continue;
  			}
  
  			/*
3806b0414   Andrew Morton   mm/vmalloc.c: fix...
1184
  			 * OK. We roll back and find the first right sub-tree,
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
  			 * that will satisfy the search criteria. It can happen
  			 * only once due to "vstart" restriction.
  			 */
  			while ((node = rb_parent(node))) {
  				va = rb_entry(node, struct vmap_area, rb_node);
  				if (is_within_this_va(va, size, align, vstart))
  					return va;
  
  				if (get_subtree_max_size(node->rb_right) >= length &&
  						vstart <= va->va_start) {
  					node = node->rb_right;
  					break;
  				}
  			}
  		}
  	}
  
  	return NULL;
  }
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
  #include <linux/random.h>
  
  static struct vmap_area *
  find_vmap_lowest_linear_match(unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	struct vmap_area *va;
  
  	list_for_each_entry(va, &free_vmap_area_list, list) {
  		if (!is_within_this_va(va, size, align, vstart))
  			continue;
  
  		return va;
  	}
  
  	return NULL;
  }
  
  static void
  find_vmap_lowest_match_check(unsigned long size)
  {
  	struct vmap_area *va_1, *va_2;
  	unsigned long vstart;
  	unsigned int rnd;
  
  	get_random_bytes(&rnd, sizeof(rnd));
  	vstart = VMALLOC_START + rnd;
  
  	va_1 = find_vmap_lowest_match(size, 1, vstart);
  	va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
  
  	if (va_1 != va_2)
  		pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx
  ",
  			va_1, va_2, vstart);
  }
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
  enum fit_type {
  	NOTHING_FIT = 0,
  	FL_FIT_TYPE = 1,	/* full fit */
  	LE_FIT_TYPE = 2,	/* left edge fit */
  	RE_FIT_TYPE = 3,	/* right edge fit */
  	NE_FIT_TYPE = 4		/* no edge fit */
  };
  
  static __always_inline enum fit_type
  classify_va_fit_type(struct vmap_area *va,
  	unsigned long nva_start_addr, unsigned long size)
  {
  	enum fit_type type;
  
  	/* Check if it is within VA. */
  	if (nva_start_addr < va->va_start ||
  			nva_start_addr + size > va->va_end)
  		return NOTHING_FIT;
  
  	/* Now classify. */
  	if (va->va_start == nva_start_addr) {
  		if (va->va_end == nva_start_addr + size)
  			type = FL_FIT_TYPE;
  		else
  			type = LE_FIT_TYPE;
  	} else if (va->va_end == nva_start_addr + size) {
  		type = RE_FIT_TYPE;
  	} else {
  		type = NE_FIT_TYPE;
  	}
  
  	return type;
  }
  
  static __always_inline int
  adjust_va_to_fit_type(struct vmap_area *va,
  	unsigned long nva_start_addr, unsigned long size,
  	enum fit_type type)
  {
2c9292336   Arnd Bergmann   mm/vmalloc.c: avo...
1281
  	struct vmap_area *lva = NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
  
  	if (type == FL_FIT_TYPE) {
  		/*
  		 * No need to split VA, it fully fits.
  		 *
  		 * |               |
  		 * V      NVA      V
  		 * |---------------|
  		 */
  		unlink_va(va, &free_vmap_area_root);
  		kmem_cache_free(vmap_area_cachep, va);
  	} else if (type == LE_FIT_TYPE) {
  		/*
  		 * Split left edge of fit VA.
  		 *
  		 * |       |
  		 * V  NVA  V   R
  		 * |-------|-------|
  		 */
  		va->va_start += size;
  	} else if (type == RE_FIT_TYPE) {
  		/*
  		 * Split right edge of fit VA.
  		 *
  		 *         |       |
  		 *     L   V  NVA  V
  		 * |-------|-------|
  		 */
  		va->va_end = nva_start_addr;
  	} else if (type == NE_FIT_TYPE) {
  		/*
  		 * Split no edge of fit VA.
  		 *
  		 *     |       |
  		 *   L V  NVA  V R
  		 * |---|-------|---|
  		 */
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
  		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
  		if (unlikely(!lva)) {
  			/*
  			 * For percpu allocator we do not do any pre-allocation
  			 * and leave it as it is. The reason is it most likely
  			 * never ends up with NE_FIT_TYPE splitting. In case of
  			 * percpu allocations offsets and sizes are aligned to
  			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
  			 * are its main fitting cases.
  			 *
  			 * There are a few exceptions though, as an example it is
  			 * a first allocation (early boot up) when we have "one"
  			 * big free space that has to be split.
060650a2a   Uladzislau Rezki (Sony)   mm/vmalloc: add m...
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
  			 *
  			 * Also we can hit this path in case of regular "vmap"
  			 * allocations, if "this" current CPU was not preloaded.
  			 * See the comment in alloc_vmap_area() why. If so, then
  			 * GFP_NOWAIT is used instead to get an extra object for
  			 * split purpose. That is rare and most time does not
  			 * occur.
  			 *
  			 * What happens if an allocation gets failed. Basically,
  			 * an "overflow" path is triggered to purge lazily freed
  			 * areas to free some memory, then, the "retry" path is
  			 * triggered to repeat one more time. See more details
  			 * in alloc_vmap_area() function.
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1345
1346
1347
1348
1349
  			 */
  			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
  			if (!lva)
  				return -1;
  		}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
  
  		/*
  		 * Build the remainder.
  		 */
  		lva->va_start = va->va_start;
  		lva->va_end = nva_start_addr;
  
  		/*
  		 * Shrink this VA to remaining size.
  		 */
  		va->va_start = nva_start_addr + size;
  	} else {
  		return -1;
  	}
  
  	if (type != FL_FIT_TYPE) {
  		augment_tree_propagate_from(va);
2c9292336   Arnd Bergmann   mm/vmalloc.c: avo...
1367
  		if (lva)	/* type == NE_FIT_TYPE */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
  			insert_vmap_area_augment(lva, &va->rb_node,
  				&free_vmap_area_root, &free_vmap_area_list);
  	}
  
  	return 0;
  }
  
  /*
   * Returns a start address of the newly allocated area, if success.
   * Otherwise a vend is returned that indicates failure.
   */
  static __always_inline unsigned long
  __alloc_vmap_area(unsigned long size, unsigned long align,
cacca6baf   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1381
  	unsigned long vstart, unsigned long vend)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
  {
  	unsigned long nva_start_addr;
  	struct vmap_area *va;
  	enum fit_type type;
  	int ret;
  
  	va = find_vmap_lowest_match(size, align, vstart);
  	if (unlikely(!va))
  		return vend;
  
  	if (va->va_start > vstart)
  		nva_start_addr = ALIGN(va->va_start, align);
  	else
  		nva_start_addr = ALIGN(vstart, align);
  
  	/* Check the "vend" restriction. */
  	if (nva_start_addr + size > vend)
  		return vend;
  
  	/* Classify what we have found. */
  	type = classify_va_fit_type(va, nva_start_addr, size);
  	if (WARN_ON_ONCE(type == NOTHING_FIT))
  		return vend;
  
  	/* Update the free vmap_area. */
  	ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
  	if (ret)
  		return vend;
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
1410
1411
1412
  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
  	find_vmap_lowest_match_check(size);
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1413
1414
  	return nva_start_addr;
  }
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1415

db64fe022   Nick Piggin   mm: rewrite vmap ...
1416
  /*
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
   * Free a region of KVA allocated by alloc_vmap_area
   */
  static void free_vmap_area(struct vmap_area *va)
  {
  	/*
  	 * Remove from the busy tree/list.
  	 */
  	spin_lock(&vmap_area_lock);
  	unlink_va(va, &vmap_area_root);
  	spin_unlock(&vmap_area_lock);
  
  	/*
  	 * Insert/Merge it back to the free tree/list.
  	 */
  	spin_lock(&free_vmap_area_lock);
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1432
  	merge_or_add_vmap_area_augment(va, &free_vmap_area_root, &free_vmap_area_list);
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1433
1434
  	spin_unlock(&free_vmap_area_lock);
  }
187f8cc45   Uladzislau Rezki (Sony)   mm/vmalloc: refac...
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
  static inline void
  preload_this_cpu_lock(spinlock_t *lock, gfp_t gfp_mask, int node)
  {
  	struct vmap_area *va = NULL;
  
  	/*
  	 * Preload this CPU with one extra vmap_area object. It is used
  	 * when fit type of free area is NE_FIT_TYPE. It guarantees that
  	 * a CPU that does an allocation is preloaded.
  	 *
  	 * We do it in non-atomic context, thus it allows us to use more
  	 * permissive allocation masks to be more stable under low memory
  	 * condition and high memory pressure.
  	 */
  	if (!this_cpu_read(ne_fit_preload_node))
  		va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
  
  	spin_lock(lock);
  
  	if (va && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, va))
  		kmem_cache_free(vmap_area_cachep, va);
  }
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1457
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1458
1459
1460
1461
1462
1463
1464
1465
   * Allocate a region of KVA of the specified size and alignment, within the
   * vstart and vend.
   */
  static struct vmap_area *alloc_vmap_area(unsigned long size,
  				unsigned long align,
  				unsigned long vstart, unsigned long vend,
  				int node, gfp_t gfp_mask)
  {
187f8cc45   Uladzislau Rezki (Sony)   mm/vmalloc: refac...
1466
  	struct vmap_area *va;
12e376a6f   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1467
  	unsigned long freed;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1468
  	unsigned long addr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1469
  	int purged = 0;
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1470
  	int ret;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1471

7766970cc   Nick Piggin   mm: vmap fix over...
1472
  	BUG_ON(!size);
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1473
  	BUG_ON(offset_in_page(size));
89699605f   Nick Piggin   mm: vmap area cache
1474
  	BUG_ON(!is_power_of_2(align));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1475

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1476
1477
  	if (unlikely(!vmap_initialized))
  		return ERR_PTR(-EBUSY);
5803ed292   Christoph Hellwig   mm: mark all call...
1478
  	might_sleep();
f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1479
  	gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1480

f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1481
  	va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1482
1483
  	if (unlikely(!va))
  		return ERR_PTR(-ENOMEM);
7f88f88f8   Catalin Marinas   mm: kmemleak: avo...
1484
1485
1486
1487
  	/*
  	 * Only scan the relevant parts containing pointers to other objects
  	 * to avoid false negatives.
  	 */
f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1488
  	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
7f88f88f8   Catalin Marinas   mm: kmemleak: avo...
1489

db64fe022   Nick Piggin   mm: rewrite vmap ...
1490
  retry:
187f8cc45   Uladzislau Rezki (Sony)   mm/vmalloc: refac...
1491
1492
1493
  	preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
  	addr = __alloc_vmap_area(size, align, vstart, vend);
  	spin_unlock(&free_vmap_area_lock);
89699605f   Nick Piggin   mm: vmap area cache
1494

afd07389d   Uladzislau Rezki (Sony)   mm/vmalloc.c: fix...
1495
  	/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1496
1497
  	 * If an allocation fails, the "vend" address is
  	 * returned. Therefore trigger the overflow path.
afd07389d   Uladzislau Rezki (Sony)   mm/vmalloc.c: fix...
1498
  	 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1499
  	if (unlikely(addr == vend))
89699605f   Nick Piggin   mm: vmap area cache
1500
  		goto overflow;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1501
1502
1503
  
  	va->va_start = addr;
  	va->va_end = addr + size;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
1504
  	va->vm = NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1505

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1506
1507
  	spin_lock(&vmap_area_lock);
  	insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1508
  	spin_unlock(&vmap_area_lock);
61e165578   Wang Xiaoqiang   mm/vmalloc.c: use...
1509
  	BUG_ON(!IS_ALIGNED(va->va_start, align));
89699605f   Nick Piggin   mm: vmap area cache
1510
1511
  	BUG_ON(va->va_start < vstart);
  	BUG_ON(va->va_end > vend);
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1512
1513
1514
1515
1516
  	ret = kasan_populate_vmalloc(addr, size);
  	if (ret) {
  		free_vmap_area(va);
  		return ERR_PTR(ret);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1517
  	return va;
89699605f   Nick Piggin   mm: vmap area cache
1518
1519
  
  overflow:
89699605f   Nick Piggin   mm: vmap area cache
1520
1521
1522
1523
1524
  	if (!purged) {
  		purge_vmap_area_lazy();
  		purged = 1;
  		goto retry;
  	}
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1525

12e376a6f   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1526
1527
1528
1529
1530
1531
  	freed = 0;
  	blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
  
  	if (freed > 0) {
  		purged = 0;
  		goto retry;
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1532
  	}
03497d761   Florian Fainelli   mm: Silence vmap(...
1533
  	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
756a025f0   Joe Perches   mm: coalesce spli...
1534
1535
1536
  		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size
  ",
  			size);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1537
1538
  
  	kmem_cache_free(vmap_area_cachep, va);
89699605f   Nick Piggin   mm: vmap area cache
1539
  	return ERR_PTR(-EBUSY);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1540
  }
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
  int register_vmap_purge_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_register(&vmap_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);
  
  int unregister_vmap_purge_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1552
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
   * lazy_max_pages is the maximum amount of virtual address space we gather up
   * before attempting to purge with a TLB flush.
   *
   * There is a tradeoff here: a larger number will cover more kernel page tables
   * and take slightly longer to purge, but it will linearly reduce the number of
   * global TLB flushes that must be performed. It would seem natural to scale
   * this number up linearly with the number of CPUs (because vmapping activity
   * could also scale linearly with the number of CPUs), however it is likely
   * that in practice, workloads might be constrained in other ways that mean
   * vmap activity will not scale linearly with CPUs. Also, I want to be
   * conservative and not introduce a big latency on huge systems, so go with
   * a less aggressive log scale. It will still be an improvement over the old
   * code, and it will be simple to change the scale factor if we find that it
   * becomes a problem on bigger systems.
   */
  static unsigned long lazy_max_pages(void)
  {
  	unsigned int log;
  
  	log = fls(num_online_cpus());
  
  	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
  }
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1576
  static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1577

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1578
  /*
f0953a1bb   Ingo Molnar   mm: fix typos in ...
1579
   * Serialize vmap purging.  There is no actual critical section protected
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1580
1581
1582
   * by this look, but we want to avoid concurrent calls for performance
   * reasons and to make the pcpu_get_vm_areas more deterministic.
   */
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1583
  static DEFINE_MUTEX(vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1584

02b709df8   Nick Piggin   mm: purge fragmen...
1585
1586
  /* for per-CPU blocks */
  static void purge_fragmented_blocks_allcpus(void);
5da96bdd9   Mel Gorman   mm/vmalloc: inclu...
1587
  #ifdef CONFIG_X86_64
db64fe022   Nick Piggin   mm: rewrite vmap ...
1588
  /*
3ee48b6af   Cliff Wickman   mm, x86: Saving v...
1589
1590
1591
1592
1593
   * called before a call to iounmap() if the caller wants vm_area_struct's
   * immediately freed.
   */
  void set_iounmap_nonlazy(void)
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1594
  	atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
3ee48b6af   Cliff Wickman   mm, x86: Saving v...
1595
  }
5da96bdd9   Mel Gorman   mm/vmalloc: inclu...
1596
  #endif /* CONFIG_X86_64 */
3ee48b6af   Cliff Wickman   mm, x86: Saving v...
1597
1598
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1599
   * Purges all lazily-freed vmap areas.
db64fe022   Nick Piggin   mm: rewrite vmap ...
1600
   */
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1601
  static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1602
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1603
  	unsigned long resched_threshold;
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1604
1605
  	struct list_head local_pure_list;
  	struct vmap_area *va, *n_va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1606

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1607
  	lockdep_assert_held(&vmap_purge_lock);
02b709df8   Nick Piggin   mm: purge fragmen...
1608

96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1609
1610
1611
1612
1613
1614
  	spin_lock(&purge_vmap_area_lock);
  	purge_vmap_area_root = RB_ROOT;
  	list_replace_init(&purge_vmap_area_list, &local_pure_list);
  	spin_unlock(&purge_vmap_area_lock);
  
  	if (unlikely(list_empty(&local_pure_list)))
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1615
  		return false;
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1616
1617
1618
1619
1620
1621
1622
  	start = min(start,
  		list_first_entry(&local_pure_list,
  			struct vmap_area, list)->va_start);
  
  	end = max(end,
  		list_last_entry(&local_pure_list,
  			struct vmap_area, list)->va_end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1623

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1624
  	flush_tlb_kernel_range(start, end);
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1625
  	resched_threshold = lazy_max_pages() << 1;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1626

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1627
  	spin_lock(&free_vmap_area_lock);
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1628
  	list_for_each_entry_safe(va, n_va, &local_pure_list, list) {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1629
  		unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1630
1631
  		unsigned long orig_start = va->va_start;
  		unsigned long orig_end = va->va_end;
763b218dd   Joel Fernandes   mm: add preempt p...
1632

dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1633
1634
1635
1636
1637
  		/*
  		 * Finally insert or merge lazily-freed area. It is
  		 * detached and there is no need to "unlink" it from
  		 * anything.
  		 */
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1638
1639
  		va = merge_or_add_vmap_area_augment(va, &free_vmap_area_root,
  				&free_vmap_area_list);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1640

9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1641
1642
  		if (!va)
  			continue;
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1643
1644
1645
  		if (is_vmalloc_or_module_addr((void *)orig_start))
  			kasan_release_vmalloc(orig_start, orig_end,
  					      va->va_start, va->va_end);
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1646

4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1647
  		atomic_long_sub(nr, &vmap_lazy_nr);
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1648

4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1649
  		if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1650
  			cond_resched_lock(&free_vmap_area_lock);
763b218dd   Joel Fernandes   mm: add preempt p...
1651
  	}
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1652
  	spin_unlock(&free_vmap_area_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1653
  	return true;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1654
1655
1656
  }
  
  /*
496850e5f   Nick Piggin   mm: vmalloc failu...
1657
1658
1659
1660
1661
   * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
   * is already purging.
   */
  static void try_purge_vmap_area_lazy(void)
  {
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1662
  	if (mutex_trylock(&vmap_purge_lock)) {
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1663
  		__purge_vmap_area_lazy(ULONG_MAX, 0);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1664
  		mutex_unlock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1665
  	}
496850e5f   Nick Piggin   mm: vmalloc failu...
1666
1667
1668
  }
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1669
1670
1671
1672
   * Kick off a purge of the outstanding lazy areas.
   */
  static void purge_vmap_area_lazy(void)
  {
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1673
  	mutex_lock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1674
1675
  	purge_fragmented_blocks_allcpus();
  	__purge_vmap_area_lazy(ULONG_MAX, 0);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1676
  	mutex_unlock(&vmap_purge_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1677
1678
1679
  }
  
  /*
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1680
1681
1682
   * Free a vmap area, caller ensuring that the area has been unmapped
   * and flush_cache_vunmap had been called for the correct range
   * previously.
db64fe022   Nick Piggin   mm: rewrite vmap ...
1683
   */
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1684
  static void free_vmap_area_noflush(struct vmap_area *va)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1685
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1686
  	unsigned long nr_lazy;
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1687

dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1688
1689
1690
  	spin_lock(&vmap_area_lock);
  	unlink_va(va, &vmap_area_root);
  	spin_unlock(&vmap_area_lock);
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1691
1692
  	nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
  				PAGE_SHIFT, &vmap_lazy_nr);
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1693

96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1694
1695
1696
1697
1698
1699
1700
  	/*
  	 * Merge or place it to the purge tree/list.
  	 */
  	spin_lock(&purge_vmap_area_lock);
  	merge_or_add_vmap_area(va,
  		&purge_vmap_area_root, &purge_vmap_area_list);
  	spin_unlock(&purge_vmap_area_lock);
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1701

96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1702
  	/* After this point, we may free va at any time */
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1703
  	if (unlikely(nr_lazy > lazy_max_pages()))
496850e5f   Nick Piggin   mm: vmalloc failu...
1704
  		try_purge_vmap_area_lazy();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1705
  }
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1706
1707
1708
1709
1710
1711
  /*
   * Free and unmap a vmap area
   */
  static void free_unmap_vmap_area(struct vmap_area *va)
  {
  	flush_cache_vunmap(va->va_start, va->va_end);
4ad0ae8c6   Nicholas Piggin   mm/vmalloc: remov...
1712
  	vunmap_range_noflush(va->va_start, va->va_end);
8e57f8acb   Vlastimil Babka   mm, debug_pageall...
1713
  	if (debug_pagealloc_enabled_static())
82a2e924f   Chintan Pandya   mm: vmalloc: clea...
1714
  		flush_tlb_kernel_range(va->va_start, va->va_end);
c8eef01e2   Christoph Hellwig   mm: remove free_u...
1715
  	free_vmap_area_noflush(va);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1716
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
  static struct vmap_area *find_vmap_area(unsigned long addr)
  {
  	struct vmap_area *va;
  
  	spin_lock(&vmap_area_lock);
  	va = __find_vmap_area(addr);
  	spin_unlock(&vmap_area_lock);
  
  	return va;
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
  /*** Per cpu kva allocator ***/
  
  /*
   * vmap space is limited especially on 32 bit architectures. Ensure there is
   * room for at least 16 percpu vmap blocks per CPU.
   */
  /*
   * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
   * to #define VMALLOC_SPACE		(VMALLOC_END-VMALLOC_START). Guess
   * instead (we just need a rough idea)
   */
  #if BITS_PER_LONG == 32
  #define VMALLOC_SPACE		(128UL*1024*1024)
  #else
  #define VMALLOC_SPACE		(128UL*1024*1024*1024)
  #endif
  
  #define VMALLOC_PAGES		(VMALLOC_SPACE / PAGE_SIZE)
  #define VMAP_MAX_ALLOC		BITS_PER_LONG	/* 256K with 4K pages */
  #define VMAP_BBMAP_BITS_MAX	1024	/* 4MB with 4K pages */
  #define VMAP_BBMAP_BITS_MIN	(VMAP_MAX_ALLOC*2)
  #define VMAP_MIN(x, y)		((x) < (y) ? (x) : (y)) /* can't use min() */
  #define VMAP_MAX(x, y)		((x) > (y) ? (x) : (y)) /* can't use max() */
f982f9151   Clemens Ladisch   mm: fix wrong vma...
1750
1751
1752
1753
  #define VMAP_BBMAP_BITS		\
  		VMAP_MIN(VMAP_BBMAP_BITS_MAX,	\
  		VMAP_MAX(VMAP_BBMAP_BITS_MIN,	\
  			VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
db64fe022   Nick Piggin   mm: rewrite vmap ...
1754
1755
1756
1757
1758
1759
  
  #define VMAP_BLOCK_SIZE		(VMAP_BBMAP_BITS * PAGE_SIZE)
  
  struct vmap_block_queue {
  	spinlock_t lock;
  	struct list_head free;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1760
1761
1762
1763
1764
  };
  
  struct vmap_block {
  	spinlock_t lock;
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1765
  	unsigned long free, dirty;
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1766
  	unsigned long dirty_min, dirty_max; /*< dirty range */
de5604231   Nick Piggin   mm: percpu-vmap f...
1767
1768
  	struct list_head free_list;
  	struct rcu_head rcu_head;
02b709df8   Nick Piggin   mm: purge fragmen...
1769
  	struct list_head purge;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1770
1771
1772
1773
1774
1775
  };
  
  /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
  static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
  
  /*
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1776
   * XArray of vmap blocks, indexed by address, to quickly find a vmap block
db64fe022   Nick Piggin   mm: rewrite vmap ...
1777
1778
1779
   * in the free path. Could get rid of this if we change the API to return a
   * "cookie" from alloc, to be passed to free. But no big deal yet.
   */
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1780
  static DEFINE_XARRAY(vmap_blocks);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
  
  /*
   * We should probably have a fallback mechanism to allocate virtual memory
   * out of partially filled vmap blocks. However vmap block sizing should be
   * fairly reasonable according to the vmalloc size, so it shouldn't be a
   * big problem.
   */
  
  static unsigned long addr_to_vb_idx(unsigned long addr)
  {
  	addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
  	addr /= VMAP_BLOCK_SIZE;
  	return addr;
  }
cf725ce27   Roman Pen   mm/vmalloc: occup...
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
  static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
  {
  	unsigned long addr;
  
  	addr = va_start + (pages_off << PAGE_SHIFT);
  	BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
  	return (void *)addr;
  }
  
  /**
   * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
   *                  block. Of course pages number can't exceed VMAP_BBMAP_BITS
   * @order:    how many 2^order pages should be occupied in newly allocated block
   * @gfp_mask: flags for the page level allocator
   *
a862f68a8   Mike Rapoport   docs/core-api/mm:...
1810
   * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
cf725ce27   Roman Pen   mm/vmalloc: occup...
1811
1812
   */
  static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1813
1814
1815
1816
1817
1818
  {
  	struct vmap_block_queue *vbq;
  	struct vmap_block *vb;
  	struct vmap_area *va;
  	unsigned long vb_idx;
  	int node, err;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1819
  	void *vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
  
  	node = numa_node_id();
  
  	vb = kmalloc_node(sizeof(struct vmap_block),
  			gfp_mask & GFP_RECLAIM_MASK, node);
  	if (unlikely(!vb))
  		return ERR_PTR(-ENOMEM);
  
  	va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
  					VMALLOC_START, VMALLOC_END,
  					node, gfp_mask);
ddf9c6d47   Tobias Klauser   vmalloc: remove r...
1831
  	if (IS_ERR(va)) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1832
  		kfree(vb);
e7d863407   Julia Lawall   mm: use ERR_CAST
1833
  		return ERR_CAST(va);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1834
  	}
cf725ce27   Roman Pen   mm/vmalloc: occup...
1835
  	vaddr = vmap_block_vaddr(va->va_start, 0);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1836
1837
  	spin_lock_init(&vb->lock);
  	vb->va = va;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1838
1839
1840
  	/* At least something should be left free */
  	BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
  	vb->free = VMAP_BBMAP_BITS - (1UL << order);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1841
  	vb->dirty = 0;
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1842
1843
  	vb->dirty_min = VMAP_BBMAP_BITS;
  	vb->dirty_max = 0;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1844
  	INIT_LIST_HEAD(&vb->free_list);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1845
1846
  
  	vb_idx = addr_to_vb_idx(va->va_start);
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1847
1848
1849
1850
1851
1852
  	err = xa_insert(&vmap_blocks, vb_idx, vb, gfp_mask);
  	if (err) {
  		kfree(vb);
  		free_vmap_area(va);
  		return ERR_PTR(err);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1853
1854
  
  	vbq = &get_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1855
  	spin_lock(&vbq->lock);
68ac546f2   Roman Pen   mm/vmalloc: fix p...
1856
  	list_add_tail_rcu(&vb->free_list, &vbq->free);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1857
  	spin_unlock(&vbq->lock);
3f04ba859   Tejun Heo   vmalloc: fix use ...
1858
  	put_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1859

cf725ce27   Roman Pen   mm/vmalloc: occup...
1860
  	return vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1861
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1862
1863
1864
  static void free_vmap_block(struct vmap_block *vb)
  {
  	struct vmap_block *tmp;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1865

0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1866
  	tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1867
  	BUG_ON(tmp != vb);
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1868
  	free_vmap_area_noflush(vb->va);
22a3c7d18   Lai Jiangshan   vmalloc,rcu: Conv...
1869
  	kfree_rcu(vb, rcu_head);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1870
  }
02b709df8   Nick Piggin   mm: purge fragmen...
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
  static void purge_fragmented_blocks(int cpu)
  {
  	LIST_HEAD(purge);
  	struct vmap_block *vb;
  	struct vmap_block *n_vb;
  	struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
  
  	rcu_read_lock();
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  
  		if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
  			continue;
  
  		spin_lock(&vb->lock);
  		if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
  			vb->free = 0; /* prevent further allocs after releasing lock */
  			vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1888
1889
  			vb->dirty_min = 0;
  			vb->dirty_max = VMAP_BBMAP_BITS;
02b709df8   Nick Piggin   mm: purge fragmen...
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
  			spin_lock(&vbq->lock);
  			list_del_rcu(&vb->free_list);
  			spin_unlock(&vbq->lock);
  			spin_unlock(&vb->lock);
  			list_add_tail(&vb->purge, &purge);
  		} else
  			spin_unlock(&vb->lock);
  	}
  	rcu_read_unlock();
  
  	list_for_each_entry_safe(vb, n_vb, &purge, purge) {
  		list_del(&vb->purge);
  		free_vmap_block(vb);
  	}
  }
02b709df8   Nick Piggin   mm: purge fragmen...
1905
1906
1907
1908
1909
1910
1911
  static void purge_fragmented_blocks_allcpus(void)
  {
  	int cpu;
  
  	for_each_possible_cpu(cpu)
  		purge_fragmented_blocks(cpu);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1912
1913
1914
1915
  static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  {
  	struct vmap_block_queue *vbq;
  	struct vmap_block *vb;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1916
  	void *vaddr = NULL;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1917
  	unsigned int order;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1918
  	BUG_ON(offset_in_page(size));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1919
  	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
aa91c4d89   Jan Kara   mm: make vb_alloc...
1920
1921
1922
1923
1924
1925
1926
1927
  	if (WARN_ON(size == 0)) {
  		/*
  		 * Allocating 0 bytes isn't what caller wants since
  		 * get_order(0) returns funny result. Just warn and terminate
  		 * early.
  		 */
  		return NULL;
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1928
  	order = get_order(size);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1929
1930
1931
  	rcu_read_lock();
  	vbq = &get_cpu_var(vmap_block_queue);
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
cf725ce27   Roman Pen   mm/vmalloc: occup...
1932
  		unsigned long pages_off;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1933
1934
  
  		spin_lock(&vb->lock);
cf725ce27   Roman Pen   mm/vmalloc: occup...
1935
1936
1937
1938
  		if (vb->free < (1UL << order)) {
  			spin_unlock(&vb->lock);
  			continue;
  		}
02b709df8   Nick Piggin   mm: purge fragmen...
1939

cf725ce27   Roman Pen   mm/vmalloc: occup...
1940
1941
  		pages_off = VMAP_BBMAP_BITS - vb->free;
  		vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
02b709df8   Nick Piggin   mm: purge fragmen...
1942
1943
1944
1945
1946
1947
  		vb->free -= 1UL << order;
  		if (vb->free == 0) {
  			spin_lock(&vbq->lock);
  			list_del_rcu(&vb->free_list);
  			spin_unlock(&vbq->lock);
  		}
cf725ce27   Roman Pen   mm/vmalloc: occup...
1948

02b709df8   Nick Piggin   mm: purge fragmen...
1949
1950
  		spin_unlock(&vb->lock);
  		break;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1951
  	}
02b709df8   Nick Piggin   mm: purge fragmen...
1952

3f04ba859   Tejun Heo   vmalloc: fix use ...
1953
  	put_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1954
  	rcu_read_unlock();
cf725ce27   Roman Pen   mm/vmalloc: occup...
1955
1956
1957
  	/* Allocate new block if nothing was found */
  	if (!vaddr)
  		vaddr = new_vmap_block(order, gfp_mask);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1958

cf725ce27   Roman Pen   mm/vmalloc: occup...
1959
  	return vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1960
  }
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1961
  static void vb_free(unsigned long addr, unsigned long size)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1962
1963
  {
  	unsigned long offset;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1964
1965
  	unsigned int order;
  	struct vmap_block *vb;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1966
  	BUG_ON(offset_in_page(size));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1967
  	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1968

78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1969
  	flush_cache_vunmap(addr, addr + size);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1970

db64fe022   Nick Piggin   mm: rewrite vmap ...
1971
  	order = get_order(size);
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1972
  	offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1973
  	vb = xa_load(&vmap_blocks, addr_to_vb_idx(addr));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1974

4ad0ae8c6   Nicholas Piggin   mm/vmalloc: remov...
1975
  	vunmap_range_noflush(addr, addr + size);
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1976

8e57f8acb   Vlastimil Babka   mm, debug_pageall...
1977
  	if (debug_pagealloc_enabled_static())
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1978
  		flush_tlb_kernel_range(addr, addr + size);
82a2e924f   Chintan Pandya   mm: vmalloc: clea...
1979

db64fe022   Nick Piggin   mm: rewrite vmap ...
1980
  	spin_lock(&vb->lock);
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1981
1982
1983
1984
  
  	/* Expand dirty range */
  	vb->dirty_min = min(vb->dirty_min, offset);
  	vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
d086817dc   MinChan Kim   vmap: remove need...
1985

db64fe022   Nick Piggin   mm: rewrite vmap ...
1986
1987
  	vb->dirty += 1UL << order;
  	if (vb->dirty == VMAP_BBMAP_BITS) {
de5604231   Nick Piggin   mm: percpu-vmap f...
1988
  		BUG_ON(vb->free);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1989
1990
1991
1992
1993
  		spin_unlock(&vb->lock);
  		free_vmap_block(vb);
  	} else
  		spin_unlock(&vb->lock);
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
1994
  static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1995
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1996
  	int cpu;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1997

9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1998
1999
  	if (unlikely(!vmap_initialized))
  		return;
5803ed292   Christoph Hellwig   mm: mark all call...
2000
  	might_sleep();
db64fe022   Nick Piggin   mm: rewrite vmap ...
2001
2002
2003
2004
2005
2006
  	for_each_possible_cpu(cpu) {
  		struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
  		struct vmap_block *vb;
  
  		rcu_read_lock();
  		list_for_each_entry_rcu(vb, &vbq->free, free_list) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2007
  			spin_lock(&vb->lock);
ad216c031   Vijayanand Jitta   mm: vmalloc: prev...
2008
  			if (vb->dirty && vb->dirty != VMAP_BBMAP_BITS) {
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
2009
  				unsigned long va_start = vb->va->va_start;
db64fe022   Nick Piggin   mm: rewrite vmap ...
2010
  				unsigned long s, e;
b136be5e0   Joonsoo Kim   mm, vmalloc: use ...
2011

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
2012
2013
  				s = va_start + (vb->dirty_min << PAGE_SHIFT);
  				e = va_start + (vb->dirty_max << PAGE_SHIFT);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2014

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
2015
2016
  				start = min(s, start);
  				end   = max(e, end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2017

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
2018
  				flush = 1;
db64fe022   Nick Piggin   mm: rewrite vmap ...
2019
2020
2021
2022
2023
  			}
  			spin_unlock(&vb->lock);
  		}
  		rcu_read_unlock();
  	}
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
2024
  	mutex_lock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
2025
2026
2027
  	purge_fragmented_blocks_allcpus();
  	if (!__purge_vmap_area_lazy(start, end) && flush)
  		flush_tlb_kernel_range(start, end);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
2028
  	mutex_unlock(&vmap_purge_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2029
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
  
  /**
   * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
   *
   * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
   * to amortize TLB flushing overheads. What this means is that any page you
   * have now, may, in a former life, have been mapped into kernel virtual
   * address by the vmap layer and so there might be some CPUs with TLB entries
   * still referencing that page (additional to the regular 1:1 kernel mapping).
   *
   * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
   * be sure that none of the pages we have control over will have any aliases
   * from the vmap layer.
   */
  void vm_unmap_aliases(void)
  {
  	unsigned long start = ULONG_MAX, end = 0;
  	int flush = 0;
  
  	_vm_unmap_aliases(start, end, flush);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
2051
2052
2053
2054
2055
2056
2057
2058
2059
  EXPORT_SYMBOL_GPL(vm_unmap_aliases);
  
  /**
   * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
   * @mem: the pointer returned by vm_map_ram
   * @count: the count passed to that vm_map_ram call (cannot unmap partial)
   */
  void vm_unmap_ram(const void *mem, unsigned int count)
  {
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2060
  	unsigned long size = (unsigned long)count << PAGE_SHIFT;
db64fe022   Nick Piggin   mm: rewrite vmap ...
2061
  	unsigned long addr = (unsigned long)mem;
9c3acf604   Christoph Hellwig   mm: remove free_u...
2062
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
2063

5803ed292   Christoph Hellwig   mm: mark all call...
2064
  	might_sleep();
db64fe022   Nick Piggin   mm: rewrite vmap ...
2065
2066
2067
  	BUG_ON(!addr);
  	BUG_ON(addr < VMALLOC_START);
  	BUG_ON(addr > VMALLOC_END);
a1c0b1a07   Shawn Lin   mm/vmalloc: use P...
2068
  	BUG_ON(!PAGE_ALIGNED(addr));
db64fe022   Nick Piggin   mm: rewrite vmap ...
2069

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2070
  	kasan_poison_vmalloc(mem, size);
9c3acf604   Christoph Hellwig   mm: remove free_u...
2071
  	if (likely(count <= VMAP_MAX_ALLOC)) {
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
2072
  		debug_check_no_locks_freed(mem, size);
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
2073
  		vb_free(addr, size);
9c3acf604   Christoph Hellwig   mm: remove free_u...
2074
2075
2076
2077
2078
  		return;
  	}
  
  	va = find_vmap_area(addr);
  	BUG_ON(!va);
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
2079
2080
  	debug_check_no_locks_freed((void *)va->va_start,
  				    (va->va_end - va->va_start));
9c3acf604   Christoph Hellwig   mm: remove free_u...
2081
  	free_unmap_vmap_area(va);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2082
2083
2084
2085
2086
2087
2088
2089
  }
  EXPORT_SYMBOL(vm_unmap_ram);
  
  /**
   * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
   * @pages: an array of pointers to the pages to be mapped
   * @count: number of pages
   * @node: prefer to allocate data structures on this node
e99c97ade   Randy Dunlap   mm: fix kernel-do...
2090
   *
364376383   Gioh Kim   mm/vmalloc.c: enh...
2091
2092
2093
2094
2095
2096
   * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
   * faster than vmap so it's good.  But if you mix long-life and short-life
   * objects with vm_map_ram(), it could consume lots of address space through
   * fragmentation (especially on a 32bit machine).  You could see failures in
   * the end.  Please use this function for short-lived objects.
   *
e99c97ade   Randy Dunlap   mm: fix kernel-do...
2097
   * Returns: a pointer to the address that has been mapped, or %NULL on failure
db64fe022   Nick Piggin   mm: rewrite vmap ...
2098
   */
d4efd79a8   Christoph Hellwig   mm: remove the pr...
2099
  void *vm_map_ram(struct page **pages, unsigned int count, int node)
db64fe022   Nick Piggin   mm: rewrite vmap ...
2100
  {
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2101
  	unsigned long size = (unsigned long)count << PAGE_SHIFT;
db64fe022   Nick Piggin   mm: rewrite vmap ...
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
  	unsigned long addr;
  	void *mem;
  
  	if (likely(count <= VMAP_MAX_ALLOC)) {
  		mem = vb_alloc(size, GFP_KERNEL);
  		if (IS_ERR(mem))
  			return NULL;
  		addr = (unsigned long)mem;
  	} else {
  		struct vmap_area *va;
  		va = alloc_vmap_area(size, PAGE_SIZE,
  				VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
  		if (IS_ERR(va))
  			return NULL;
  
  		addr = va->va_start;
  		mem = (void *)addr;
  	}
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2120
2121
  
  	kasan_unpoison_vmalloc(mem, size);
b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
2122
2123
  	if (vmap_pages_range(addr, addr + size, PAGE_KERNEL,
  				pages, PAGE_SHIFT) < 0) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2124
2125
2126
  		vm_unmap_ram(mem, count);
  		return NULL;
  	}
b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
2127

db64fe022   Nick Piggin   mm: rewrite vmap ...
2128
2129
2130
  	return mem;
  }
  EXPORT_SYMBOL(vm_map_ram);
4341fa454   Joonsoo Kim   mm, vmalloc: remo...
2131
  static struct vm_struct *vmlist __initdata;
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2132

121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
  static inline unsigned int vm_area_page_order(struct vm_struct *vm)
  {
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
  	return vm->page_order;
  #else
  	return 0;
  #endif
  }
  
  static inline void set_vm_area_page_order(struct vm_struct *vm, unsigned int order)
  {
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
  	vm->page_order = order;
  #else
  	BUG_ON(order != 0);
  #endif
  }
f0aa66179   Tejun Heo   vmalloc: implemen...
2150
  /**
be9b7335e   Nicolas Pitre   mm: add vm_area_a...
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
   * vm_area_add_early - add vmap area early during boot
   * @vm: vm_struct to add
   *
   * This function is used to add fixed kernel vm area to vmlist before
   * vmalloc_init() is called.  @vm->addr, @vm->size, and @vm->flags
   * should contain proper values and the other fields should be zero.
   *
   * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
   */
  void __init vm_area_add_early(struct vm_struct *vm)
  {
  	struct vm_struct *tmp, **p;
  
  	BUG_ON(vmap_initialized);
  	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
  		if (tmp->addr >= vm->addr) {
  			BUG_ON(tmp->addr < vm->addr + vm->size);
  			break;
  		} else
  			BUG_ON(tmp->addr + tmp->size > vm->addr);
  	}
  	vm->next = *p;
  	*p = vm;
  }
  
  /**
f0aa66179   Tejun Heo   vmalloc: implemen...
2177
2178
   * vm_area_register_early - register vmap area early during boot
   * @vm: vm_struct to register
c0c0a2937   Tejun Heo   vmalloc: add @ali...
2179
   * @align: requested alignment
f0aa66179   Tejun Heo   vmalloc: implemen...
2180
2181
2182
2183
2184
2185
2186
2187
   *
   * This function is used to register kernel vm area before
   * vmalloc_init() is called.  @vm->size and @vm->flags should contain
   * proper values on entry and other fields should be zero.  On return,
   * vm->addr contains the allocated address.
   *
   * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
   */
c0c0a2937   Tejun Heo   vmalloc: add @ali...
2188
  void __init vm_area_register_early(struct vm_struct *vm, size_t align)
f0aa66179   Tejun Heo   vmalloc: implemen...
2189
2190
  {
  	static size_t vm_init_off __initdata;
c0c0a2937   Tejun Heo   vmalloc: add @ali...
2191
2192
2193
2194
  	unsigned long addr;
  
  	addr = ALIGN(VMALLOC_START + vm_init_off, align);
  	vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
f0aa66179   Tejun Heo   vmalloc: implemen...
2195

c0c0a2937   Tejun Heo   vmalloc: add @ali...
2196
  	vm->addr = (void *)addr;
f0aa66179   Tejun Heo   vmalloc: implemen...
2197

be9b7335e   Nicolas Pitre   mm: add vm_area_a...
2198
  	vm_area_add_early(vm);
f0aa66179   Tejun Heo   vmalloc: implemen...
2199
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
  static void vmap_init_free_space(void)
  {
  	unsigned long vmap_start = 1;
  	const unsigned long vmap_end = ULONG_MAX;
  	struct vmap_area *busy, *free;
  
  	/*
  	 *     B     F     B     B     B     F
  	 * -|-----|.....|-----|-----|-----|.....|-
  	 *  |           The KVA space           |
  	 *  |<--------------------------------->|
  	 */
  	list_for_each_entry(busy, &vmap_area_list, list) {
  		if (busy->va_start - vmap_start > 0) {
  			free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  			if (!WARN_ON_ONCE(!free)) {
  				free->va_start = vmap_start;
  				free->va_end = busy->va_start;
  
  				insert_vmap_area_augment(free, NULL,
  					&free_vmap_area_root,
  						&free_vmap_area_list);
  			}
  		}
  
  		vmap_start = busy->va_end;
  	}
  
  	if (vmap_end - vmap_start > 0) {
  		free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  		if (!WARN_ON_ONCE(!free)) {
  			free->va_start = vmap_start;
  			free->va_end = vmap_end;
  
  			insert_vmap_area_augment(free, NULL,
  				&free_vmap_area_root,
  					&free_vmap_area_list);
  		}
  	}
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
2240
2241
  void __init vmalloc_init(void)
  {
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
2242
2243
  	struct vmap_area *va;
  	struct vm_struct *tmp;
db64fe022   Nick Piggin   mm: rewrite vmap ...
2244
  	int i;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2245
2246
2247
2248
  	/*
  	 * Create the cache for vmap_area objects.
  	 */
  	vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2249
2250
  	for_each_possible_cpu(i) {
  		struct vmap_block_queue *vbq;
32fcfd407   Al Viro   make vfree() safe...
2251
  		struct vfree_deferred *p;
db64fe022   Nick Piggin   mm: rewrite vmap ...
2252
2253
2254
2255
  
  		vbq = &per_cpu(vmap_block_queue, i);
  		spin_lock_init(&vbq->lock);
  		INIT_LIST_HEAD(&vbq->free);
32fcfd407   Al Viro   make vfree() safe...
2256
2257
2258
  		p = &per_cpu(vfree_deferred, i);
  		init_llist_head(&p->list);
  		INIT_WORK(&p->wq, free_work);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2259
  	}
9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
2260

822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
2261
2262
  	/* Import existing vmlist entries. */
  	for (tmp = vmlist; tmp; tmp = tmp->next) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2263
2264
2265
  		va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  		if (WARN_ON_ONCE(!va))
  			continue;
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
2266
2267
  		va->va_start = (unsigned long)tmp->addr;
  		va->va_end = va->va_start + tmp->size;
dbda591d9   KyongHo   mm: fix faulty in...
2268
  		va->vm = tmp;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2269
  		insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
2270
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
2271

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2272
2273
2274
2275
  	/*
  	 * Now we can initialize a free vmap space.
  	 */
  	vmap_init_free_space();
9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
2276
  	vmap_initialized = true;
db64fe022   Nick Piggin   mm: rewrite vmap ...
2277
  }
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
2278
2279
  static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
  	struct vmap_area *va, unsigned long flags, const void *caller)
cf88c7900   Tejun Heo   vmalloc: separate...
2280
  {
cf88c7900   Tejun Heo   vmalloc: separate...
2281
2282
2283
2284
  	vm->flags = flags;
  	vm->addr = (void *)va->va_start;
  	vm->size = va->va_end - va->va_start;
  	vm->caller = caller;
db1aecafe   Minchan Kim   mm/vmalloc.c: cha...
2285
  	va->vm = vm;
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
2286
2287
2288
2289
2290
2291
2292
  }
  
  static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
  			      unsigned long flags, const void *caller)
  {
  	spin_lock(&vmap_area_lock);
  	setup_vmalloc_vm_locked(vm, va, flags, caller);
c69480ade   Joonsoo Kim   mm, vmalloc: prot...
2293
  	spin_unlock(&vmap_area_lock);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2294
  }
cf88c7900   Tejun Heo   vmalloc: separate...
2295

20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2296
  static void clear_vm_uninitialized_flag(struct vm_struct *vm)
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2297
  {
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
2298
  	/*
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2299
  	 * Before removing VM_UNINITIALIZED,
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
2300
2301
2302
2303
  	 * we should make sure that vm has proper values.
  	 * Pair with smp_rmb() in show_numa_info().
  	 */
  	smp_wmb();
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2304
  	vm->flags &= ~VM_UNINITIALIZED;
cf88c7900   Tejun Heo   vmalloc: separate...
2305
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
2306
  static struct vm_struct *__get_vm_area_node(unsigned long size,
7ca3027b7   Daniel Axtens   mm/vmalloc: unbre...
2307
2308
2309
  		unsigned long align, unsigned long shift, unsigned long flags,
  		unsigned long start, unsigned long end, int node,
  		gfp_t gfp_mask, const void *caller)
db64fe022   Nick Piggin   mm: rewrite vmap ...
2310
  {
0006526d7   Kautuk Consul   mm/vmalloc.c: rem...
2311
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
2312
  	struct vm_struct *area;
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2313
  	unsigned long requested_size = size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2314

52fd24ca1   Giridhar Pemmasani   [PATCH] __vmalloc...
2315
  	BUG_ON(in_interrupt());
7ca3027b7   Daniel Axtens   mm/vmalloc: unbre...
2316
  	size = ALIGN(size, 1ul << shift);
31be83095   OGAWA Hirofumi   [PATCH] Fix stran...
2317
2318
  	if (unlikely(!size))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2319

252e5c6e2   zijun_hu   mm/vmalloc.c: fix...
2320
2321
2322
  	if (flags & VM_IOREMAP)
  		align = 1ul << clamp_t(int, get_count_order_long(size),
  				       PAGE_SHIFT, IOREMAP_MAX_ORDER);
cf88c7900   Tejun Heo   vmalloc: separate...
2323
  	area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2324
2325
  	if (unlikely(!area))
  		return NULL;
71394fe50   Andrey Ryabinin   mm: vmalloc: add ...
2326
2327
  	if (!(flags & VM_NO_GUARD))
  		size += PAGE_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2328

db64fe022   Nick Piggin   mm: rewrite vmap ...
2329
2330
2331
2332
  	va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
  	if (IS_ERR(va)) {
  		kfree(area);
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2333
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2334

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2335
  	kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2336

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2337
  	setup_vmalloc_vm(area, va, flags, caller);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
2338

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2339
  	return area;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2340
  }
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2341
2342
  struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
  				       unsigned long start, unsigned long end,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
2343
  				       const void *caller)
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2344
  {
7ca3027b7   Daniel Axtens   mm/vmalloc: unbre...
2345
2346
  	return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end,
  				  NUMA_NO_NODE, GFP_KERNEL, caller);
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2347
  }
1102b657d   Jan Kiszka   mm: vmalloc: Expo...
2348
  EXPORT_SYMBOL_GPL(__get_vm_area_caller);
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2349

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2350
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2351
2352
2353
   * get_vm_area - reserve a contiguous kernel virtual area
   * @size:	 size of the area
   * @flags:	 %VM_IOREMAP for I/O mappings or VM_ALLOC
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2354
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2355
2356
2357
   * Search an area of @size in the kernel virtual mapping area,
   * and reserved it for out purposes.  Returns the area descriptor
   * on success or %NULL on failure.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2358
2359
   *
   * Return: the area descriptor on success or %NULL on failure.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2360
2361
2362
   */
  struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
  {
7ca3027b7   Daniel Axtens   mm/vmalloc: unbre...
2363
2364
  	return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
  				  VMALLOC_START, VMALLOC_END,
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2365
2366
  				  NUMA_NO_NODE, GFP_KERNEL,
  				  __builtin_return_address(0));
230169693   Christoph Lameter   vmallocinfo: add ...
2367
2368
2369
  }
  
  struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
2370
  				const void *caller)
230169693   Christoph Lameter   vmallocinfo: add ...
2371
  {
7ca3027b7   Daniel Axtens   mm/vmalloc: unbre...
2372
2373
  	return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
  				  VMALLOC_START, VMALLOC_END,
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2374
  				  NUMA_NO_NODE, GFP_KERNEL, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2375
  }
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2376
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2377
2378
   * find_vm_area - find a continuous kernel virtual area
   * @addr:	  base address
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2379
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2380
2381
2382
   * Search for the kernel VM area starting at @addr, and return it.
   * It is up to the caller to do all required locking to keep the returned
   * pointer valid.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2383
   *
74640617e   Hui Su   mm/vmalloc.c: fix...
2384
   * Return: the area descriptor on success or %NULL on failure.
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2385
2386
   */
  struct vm_struct *find_vm_area(const void *addr)
833423143   Nick Piggin   [PATCH] mm: intro...
2387
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2388
  	struct vmap_area *va;
833423143   Nick Piggin   [PATCH] mm: intro...
2389

db64fe022   Nick Piggin   mm: rewrite vmap ...
2390
  	va = find_vmap_area((unsigned long)addr);
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2391
2392
  	if (!va)
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2393

688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2394
  	return va->vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2395
  }
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2396
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2397
2398
   * remove_vm_area - find and remove a continuous kernel virtual area
   * @addr:	    base address
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2399
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2400
2401
2402
   * Search for the kernel VM area starting at @addr, and remove it.
   * This function returns the found VM area, but using it is NOT safe
   * on SMP machines, except for its size or flags.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2403
   *
74640617e   Hui Su   mm/vmalloc.c: fix...
2404
   * Return: the area descriptor on success or %NULL on failure.
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2405
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2406
  struct vm_struct *remove_vm_area(const void *addr)
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2407
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2408
  	struct vmap_area *va;
5803ed292   Christoph Hellwig   mm: mark all call...
2409
  	might_sleep();
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
2410
2411
  	spin_lock(&vmap_area_lock);
  	va = __find_vmap_area((unsigned long)addr);
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2412
  	if (va && va->vm) {
db1aecafe   Minchan Kim   mm/vmalloc.c: cha...
2413
  		struct vm_struct *vm = va->vm;
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2414

c69480ade   Joonsoo Kim   mm, vmalloc: prot...
2415
  		va->vm = NULL;
c69480ade   Joonsoo Kim   mm, vmalloc: prot...
2416
  		spin_unlock(&vmap_area_lock);
a5af5aa8b   Andrey Ryabinin   kasan, module, vm...
2417
  		kasan_free_shadow(vm);
dd32c2799   KAMEZAWA Hiroyuki   vmalloc: unmap vm...
2418
  		free_unmap_vmap_area(va);
dd32c2799   KAMEZAWA Hiroyuki   vmalloc: unmap vm...
2419

db64fe022   Nick Piggin   mm: rewrite vmap ...
2420
2421
  		return vm;
  	}
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
2422
2423
  
  	spin_unlock(&vmap_area_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2424
  	return NULL;
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2425
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2426
2427
2428
2429
  static inline void set_area_direct_map(const struct vm_struct *area,
  				       int (*set_direct_map)(struct page *page))
  {
  	int i;
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2430
  	/* HUGE_VMALLOC passes small pages to set_direct_map */
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2431
2432
2433
2434
2435
2436
2437
2438
  	for (i = 0; i < area->nr_pages; i++)
  		if (page_address(area->pages[i]))
  			set_direct_map(area->pages[i]);
  }
  
  /* Handle removing and resetting vm mappings related to the vm_struct. */
  static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
  {
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2439
  	unsigned long start = ULONG_MAX, end = 0;
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2440
  	unsigned int page_order = vm_area_page_order(area);
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2441
  	int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2442
  	int flush_dmap = 0;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2443
  	int i;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
  	remove_vm_area(area->addr);
  
  	/* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
  	if (!flush_reset)
  		return;
  
  	/*
  	 * If not deallocating pages, just do the flush of the VM area and
  	 * return.
  	 */
  	if (!deallocate_pages) {
  		vm_unmap_aliases();
  		return;
  	}
  
  	/*
  	 * If execution gets here, flush the vm mapping and reset the direct
  	 * map. Find the start and end range of the direct mappings to make sure
  	 * the vm_unmap_aliases() flush includes the direct map.
  	 */
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2464
  	for (i = 0; i < area->nr_pages; i += 1U << page_order) {
8e41f8726   Rick Edgecombe   mm/vmalloc: Fix c...
2465
2466
  		unsigned long addr = (unsigned long)page_address(area->pages[i]);
  		if (addr) {
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2467
2468
2469
  			unsigned long page_size;
  
  			page_size = PAGE_SIZE << page_order;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2470
  			start = min(addr, start);
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2471
  			end = max(addr + page_size, end);
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2472
  			flush_dmap = 1;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2473
2474
2475
2476
2477
2478
2479
2480
2481
  		}
  	}
  
  	/*
  	 * Set direct map to something invalid so that it won't be cached if
  	 * there are any accesses after the TLB flush, then flush the TLB and
  	 * reset the direct map permissions to the default.
  	 */
  	set_area_direct_map(area, set_direct_map_invalid_noflush);
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2482
  	_vm_unmap_aliases(start, end, flush_dmap);
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2483
2484
  	set_area_direct_map(area, set_direct_map_default_noflush);
  }
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2485
  static void __vunmap(const void *addr, int deallocate_pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2486
2487
2488
2489
2490
  {
  	struct vm_struct *area;
  
  	if (!addr)
  		return;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2491
2492
  	if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)
  ",
ab15d9b4c   Dan Carpenter   mm/vmalloc.c: unb...
2493
  			addr))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2494
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2495

6ade20327   Liviu Dudau   mm/vmalloc.c: don...
2496
  	area = find_vm_area(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2497
  	if (unlikely(!area)) {
4c8573e25   Arjan van de Ven   Use WARN() in mm/...
2498
2499
  		WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2500
  				addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2501
2502
  		return;
  	}
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
2503
2504
  	debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
  	debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
9a11b49a8   Ingo Molnar   [PATCH] lockdep: ...
2505

c041098c6   Vincenzo Frascino   mm/vmalloc.c: fix...
2506
  	kasan_poison_vmalloc(area->addr, get_vm_area_size(area));
3c5c3cfb9   Daniel Axtens   kasan: support ba...
2507

868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2508
  	vm_remove_mappings(area, deallocate_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2509
  	if (deallocate_pages) {
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2510
  		unsigned int page_order = vm_area_page_order(area);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2511
  		int i;
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2512
  		for (i = 0; i < area->nr_pages; i += 1U << page_order) {
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2513
2514
2515
  			struct page *page = area->pages[i];
  
  			BUG_ON(!page);
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2516
  			__free_pages(page, page_order);
a850e932d   Rafael Aquini   mm: vmalloc: add ...
2517
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2518
  		}
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2519
  		atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2520

244d63ee3   David Rientjes   mm, vmalloc: remo...
2521
  		kvfree(area->pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2522
2523
2524
  	}
  
  	kfree(area);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2525
  }
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2526
2527
2528
2529
2530
2531
2532
  
  static inline void __vfree_deferred(const void *addr)
  {
  	/*
  	 * Use raw_cpu_ptr() because this can be called from preemptible
  	 * context. Preemption is absolutely fine here, because the llist_add()
  	 * implementation is lockless, so it works even if we are adding to
73221d888   Jeongtae Park   mm/vmalloc: fix a...
2533
  	 * another cpu's list. schedule_work() should be fine with this too.
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2534
2535
2536
2537
2538
2539
2540
2541
  	 */
  	struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
  
  	if (llist_add((struct llist_node *)addr, &p->list))
  		schedule_work(&p->wq);
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2542
2543
   * vfree_atomic - release memory allocated by vmalloc()
   * @addr:	  memory base address
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2544
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2545
2546
   * This one is just like vfree() but can be called in any atomic context
   * except NMIs.
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
   */
  void vfree_atomic(const void *addr)
  {
  	BUG_ON(in_nmi());
  
  	kmemleak_free(addr);
  
  	if (!addr)
  		return;
  	__vfree_deferred(addr);
  }
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2558
2559
2560
2561
2562
2563
2564
  static void __vfree(const void *addr)
  {
  	if (unlikely(in_interrupt()))
  		__vfree_deferred(addr);
  	else
  		__vunmap(addr, 1);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2565
  /**
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2566
2567
   * vfree - Release memory allocated by vmalloc()
   * @addr:  Memory base address
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2568
   *
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2569
2570
2571
2572
   * Free the virtually continuous memory area starting at @addr, as obtained
   * from one of the vmalloc() family of APIs.  This will usually also free the
   * physical memory underlying the virtual allocation, but that memory is
   * reference counted, so it will not be freed until the last user goes away.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2573
   *
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2574
   * If @addr is NULL, no operation is performed.
c9fcee513   Andrew Morton   mm/vmalloc.c: add...
2575
   *
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2576
   * Context:
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2577
   * May sleep if called *not* from interrupt context.
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2578
2579
   * Must not be called in NMI context (strictly speaking, it could be
   * if we have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
f0953a1bb   Ingo Molnar   mm: fix typos in ...
2580
   * conventions for vfree() arch-dependent would be a really bad idea).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2581
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2582
  void vfree(const void *addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2583
  {
32fcfd407   Al Viro   make vfree() safe...
2584
  	BUG_ON(in_nmi());
89219d37a   Catalin Marinas   kmemleak: Add the...
2585
2586
  
  	kmemleak_free(addr);
a8dda165e   Andrey Ryabinin   vfree: add debug ...
2587
  	might_sleep_if(!in_interrupt());
32fcfd407   Al Viro   make vfree() safe...
2588
2589
  	if (!addr)
  		return;
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2590
2591
  
  	__vfree(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2592
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2593
2594
2595
  EXPORT_SYMBOL(vfree);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2596
2597
   * vunmap - release virtual mapping obtained by vmap()
   * @addr:   memory base address
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2598
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2599
2600
   * Free the virtually contiguous memory area starting at @addr,
   * which was created from the page array passed to vmap().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2601
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2602
   * Must not be called in interrupt context.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2603
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2604
  void vunmap(const void *addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2605
2606
  {
  	BUG_ON(in_interrupt());
34754b69a   Peter Zijlstra   x86: make vmap ye...
2607
  	might_sleep();
32fcfd407   Al Viro   make vfree() safe...
2608
2609
  	if (addr)
  		__vunmap(addr, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2610
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2611
2612
2613
  EXPORT_SYMBOL(vunmap);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2614
2615
2616
2617
2618
2619
   * vmap - map an array of pages into virtually contiguous space
   * @pages: array of page pointers
   * @count: number of pages to map
   * @flags: vm_area->flags
   * @prot: page protection for the mapping
   *
b944afc9d   Christoph Hellwig   mm: add a VM_MAP_...
2620
2621
2622
2623
2624
   * Maps @count pages from @pages into contiguous kernel virtual space.
   * If @flags contains %VM_MAP_PUT_PAGES the ownership of the pages array itself
   * (which must be kmalloc or vmalloc memory) and one reference per pages in it
   * are transferred from the caller to vmap(), and will be freed / dropped when
   * vfree() is called on the return value.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2625
2626
   *
   * Return: the address of the area or %NULL on failure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2627
2628
   */
  void *vmap(struct page **pages, unsigned int count,
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2629
  	   unsigned long flags, pgprot_t prot)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2630
2631
  {
  	struct vm_struct *area;
b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
2632
  	unsigned long addr;
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2633
  	unsigned long size;		/* In bytes */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2634

34754b69a   Peter Zijlstra   x86: make vmap ye...
2635
  	might_sleep();
ca79b0c21   Arun KS   mm: convert total...
2636
  	if (count > totalram_pages())
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2637
  		return NULL;
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2638
2639
  	size = (unsigned long)count << PAGE_SHIFT;
  	area = get_vm_area_caller(size, flags, __builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2640
2641
  	if (!area)
  		return NULL;
230169693   Christoph Lameter   vmallocinfo: add ...
2642

b67177ecd   Nicholas Piggin   mm/vmalloc: remov...
2643
2644
2645
  	addr = (unsigned long)area->addr;
  	if (vmap_pages_range(addr, addr + size, pgprot_nx(prot),
  				pages, PAGE_SHIFT) < 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2646
2647
2648
  		vunmap(area->addr);
  		return NULL;
  	}
c22ee5284   Miaohe Lin   mm/vmalloc.c: fix...
2649
  	if (flags & VM_MAP_PUT_PAGES) {
b944afc9d   Christoph Hellwig   mm: add a VM_MAP_...
2650
  		area->pages = pages;
c22ee5284   Miaohe Lin   mm/vmalloc.c: fix...
2651
2652
  		area->nr_pages = count;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2653
2654
  	return area->addr;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2655
  EXPORT_SYMBOL(vmap);
3e9a9e256   Christoph Hellwig   mm: add a vmap_pf...
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
  #ifdef CONFIG_VMAP_PFN
  struct vmap_pfn_data {
  	unsigned long	*pfns;
  	pgprot_t	prot;
  	unsigned int	idx;
  };
  
  static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private)
  {
  	struct vmap_pfn_data *data = private;
  
  	if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx])))
  		return -EINVAL;
  	*pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot));
  	return 0;
  }
  
  /**
   * vmap_pfn - map an array of PFNs into virtually contiguous space
   * @pfns: array of PFNs
   * @count: number of pages to map
   * @prot: page protection for the mapping
   *
   * Maps @count PFNs from @pfns into contiguous kernel virtual space and returns
   * the start address of the mapping.
   */
  void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
  {
  	struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) };
  	struct vm_struct *area;
  
  	area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP,
  			__builtin_return_address(0));
  	if (!area)
  		return NULL;
  	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
  			count * PAGE_SIZE, vmap_pfn_apply, &data)) {
  		free_vm_area(area);
  		return NULL;
  	}
  	return area->addr;
  }
  EXPORT_SYMBOL_GPL(vmap_pfn);
  #endif /* CONFIG_VMAP_PFN */
12b9f873a   Uladzislau Rezki   mm/vmalloc: fallb...
2700
2701
  static inline unsigned int
  vm_area_alloc_pages(gfp_t gfp, int nid,
343ab8178   Uladzislau Rezki (Sony)   mm/vmalloc: use b...
2702
  		unsigned int order, unsigned int nr_pages, struct page **pages)
12b9f873a   Uladzislau Rezki   mm/vmalloc: fallb...
2703
2704
  {
  	unsigned int nr_allocated = 0;
ffb29b1c2   Chen Wandun   mm/vmalloc: fix n...
2705
2706
  	struct page *page;
  	int i;
12b9f873a   Uladzislau Rezki   mm/vmalloc: fallb...
2707
2708
2709
2710
2711
2712
2713
  
  	/*
  	 * For order-0 pages we make use of bulk allocator, if
  	 * the page array is partly or not at all populated due
  	 * to fails, fallback to a single page allocator that is
  	 * more permissive.
  	 */
ffb29b1c2   Chen Wandun   mm/vmalloc: fix n...
2714
  	if (!order && nid != NUMA_NO_NODE) {
343ab8178   Uladzislau Rezki (Sony)   mm/vmalloc: use b...
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
  		while (nr_allocated < nr_pages) {
  			unsigned int nr, nr_pages_request;
  
  			/*
  			 * A maximum allowed request is hard-coded and is 100
  			 * pages per call. That is done in order to prevent a
  			 * long preemption off scenario in the bulk-allocator
  			 * so the range is [1:100].
  			 */
  			nr_pages_request = min(100U, nr_pages - nr_allocated);
  
  			nr = alloc_pages_bulk_array_node(gfp, nid,
  				nr_pages_request, pages + nr_allocated);
  
  			nr_allocated += nr;
  			cond_resched();
  
  			/*
  			 * If zero or pages were obtained partly,
  			 * fallback to a single page allocator.
  			 */
  			if (nr != nr_pages_request)
  				break;
  		}
ffb29b1c2   Chen Wandun   mm/vmalloc: fix n...
2739
  	} else if (order)
12b9f873a   Uladzislau Rezki   mm/vmalloc: fallb...
2740
2741
2742
2743
2744
2745
2746
  		/*
  		 * Compound pages required for remap_vmalloc_page if
  		 * high-order pages.
  		 */
  		gfp |= __GFP_COMP;
  
  	/* High-order pages or fallback path if "bulk" fails. */
12b9f873a   Uladzislau Rezki   mm/vmalloc: fallb...
2747

ffb29b1c2   Chen Wandun   mm/vmalloc: fix n...
2748
2749
2750
2751
2752
  	while (nr_allocated < nr_pages) {
  		if (nid == NUMA_NO_NODE)
  			page = alloc_pages(gfp, order);
  		else
  			page = alloc_pages_node(nid, gfp, order);
12b9f873a   Uladzislau Rezki   mm/vmalloc: fallb...
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
  		if (unlikely(!page))
  			break;
  
  		/*
  		 * Careful, we allocate and map page-order pages, but
  		 * tracking is done per PAGE_SIZE page so as to keep the
  		 * vm_struct APIs independent of the physical/mapped size.
  		 */
  		for (i = 0; i < (1U << order); i++)
  			pages[nr_allocated + i] = page + i;
12e376a6f   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
2763
  		cond_resched();
12b9f873a   Uladzislau Rezki   mm/vmalloc: fallb...
2764
2765
2766
2767
2768
  		nr_allocated += 1U << order;
  	}
  
  	return nr_allocated;
  }
e31d9eb5c   Adrian Bunk   make __vmalloc_ar...
2769
  static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2770
2771
  				 pgprot_t prot, unsigned int page_shift,
  				 int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2772
  {
930f036b4   David Rientjes   mm, vmalloc: cons...
2773
  	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2774
2775
  	unsigned long addr = (unsigned long)area->addr;
  	unsigned long size = get_vm_area_size(area);
34fe65371   Andrew Morton   mm/vmalloc.c:__vm...
2776
  	unsigned long array_size;
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2777
2778
  	unsigned int nr_small_pages = size >> PAGE_SHIFT;
  	unsigned int page_order;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2779

121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2780
  	array_size = (unsigned long)nr_small_pages * sizeof(struct page *);
f255935b9   Christoph Hellwig   mm: cleanup the g...
2781
2782
2783
  	gfp_mask |= __GFP_NOWARN;
  	if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
  		gfp_mask |= __GFP_HIGHMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2784

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2785
  	/* Please note that the recursion is strictly bounded. */
8757d5fa6   Jan Kiszka   [PATCH] mm: fix o...
2786
  	if (array_size > PAGE_SIZE) {
5c1f4e690   Uladzislau Rezki (Sony)   mm/vmalloc: switc...
2787
  		area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
f255935b9   Christoph Hellwig   mm: cleanup the g...
2788
  					area->caller);
286e1ea3a   Andrew Morton   [PATCH] vmalloc()...
2789
  	} else {
5c1f4e690   Uladzislau Rezki (Sony)   mm/vmalloc: switc...
2790
  		area->pages = kmalloc_node(array_size, nested_gfp, node);
286e1ea3a   Andrew Morton   [PATCH] vmalloc()...
2791
  	}
7ea362427   Austin Kim   mm/vmalloc.c: mov...
2792

5c1f4e690   Uladzislau Rezki (Sony)   mm/vmalloc: switc...
2793
  	if (!area->pages) {
d70bec8cc   Nicholas Piggin   mm/vmalloc: impro...
2794
  		warn_alloc(gfp_mask, NULL,
f4bdfeaf1   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
2795
2796
  			"vmalloc error: size %lu, failed to allocated page array size %lu",
  			nr_small_pages * PAGE_SIZE, array_size);
cd61413ba   Uladzislau Rezki (Sony)   mm/vmalloc: print...
2797
  		free_vm_area(area);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2798
2799
  		return NULL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2800

121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2801
  	set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2802
  	page_order = vm_area_page_order(area);
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2803

12b9f873a   Uladzislau Rezki   mm/vmalloc: fallb...
2804
2805
  	area->nr_pages = vm_area_alloc_pages(gfp_mask, node,
  		page_order, nr_small_pages, area->pages);
5c1f4e690   Uladzislau Rezki (Sony)   mm/vmalloc: switc...
2806

97105f0ab   Roman Gushchin   mm: vmalloc: show...
2807
  	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2808

5c1f4e690   Uladzislau Rezki (Sony)   mm/vmalloc: switc...
2809
2810
2811
2812
2813
2814
  	/*
  	 * If not enough pages were obtained to accomplish an
  	 * allocation request, free them via __vfree() if any.
  	 */
  	if (area->nr_pages != nr_small_pages) {
  		warn_alloc(gfp_mask, NULL,
f4bdfeaf1   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
2815
  			"vmalloc error: size %lu, page order %u, failed to allocate pages",
5c1f4e690   Uladzislau Rezki (Sony)   mm/vmalloc: switc...
2816
2817
2818
  			area->nr_pages * PAGE_SIZE, page_order);
  		goto fail;
  	}
12b9f873a   Uladzislau Rezki   mm/vmalloc: fallb...
2819
2820
  	if (vmap_pages_range(addr, addr + size, prot, area->pages,
  			page_shift) < 0) {
d70bec8cc   Nicholas Piggin   mm/vmalloc: impro...
2821
  		warn_alloc(gfp_mask, NULL,
f4bdfeaf1   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
2822
2823
  			"vmalloc error: size %lu, failed to map pages",
  			area->nr_pages * PAGE_SIZE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2824
  		goto fail;
d70bec8cc   Nicholas Piggin   mm/vmalloc: impro...
2825
  	}
ed1f324c5   Christoph Hellwig   mm: remove map_vm...
2826

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2827
2828
2829
  	return area->addr;
  
  fail:
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2830
  	__vfree(area->addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2831
2832
2833
2834
  	return NULL;
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
   * __vmalloc_node_range - allocate virtually contiguous memory
   * @size:		  allocation size
   * @align:		  desired alignment
   * @start:		  vm area range start
   * @end:		  vm area range end
   * @gfp_mask:		  flags for the page level allocator
   * @prot:		  protection mask for the allocated pages
   * @vm_flags:		  additional vm area flags (e.g. %VM_NO_GUARD)
   * @node:		  node to use for allocation or NUMA_NO_NODE
   * @caller:		  caller's return address
   *
   * Allocate enough pages to cover @size from the page level
   * allocator with @gfp_mask flags.  Map them into contiguous
   * kernel virtual space, using a pagetable protection of @prot.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2849
2850
   *
   * Return: the address of the area or %NULL on failure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2851
   */
d0a21265d   David Rientjes   mm: unify module_...
2852
2853
  void *__vmalloc_node_range(unsigned long size, unsigned long align,
  			unsigned long start, unsigned long end, gfp_t gfp_mask,
cb9e3c292   Andrey Ryabinin   mm: vmalloc: pass...
2854
2855
  			pgprot_t prot, unsigned long vm_flags, int node,
  			const void *caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2856
2857
  {
  	struct vm_struct *area;
89219d37a   Catalin Marinas   kmemleak: Add the...
2858
2859
  	void *addr;
  	unsigned long real_size = size;
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2860
2861
  	unsigned long real_align = align;
  	unsigned int shift = PAGE_SHIFT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2862

d70bec8cc   Nicholas Piggin   mm/vmalloc: impro...
2863
2864
2865
2866
2867
  	if (WARN_ON_ONCE(!size))
  		return NULL;
  
  	if ((size >> PAGE_SHIFT) > totalram_pages()) {
  		warn_alloc(gfp_mask, NULL,
f4bdfeaf1   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
2868
2869
  			"vmalloc error: size %lu, exceeds total pages",
  			real_size);
d70bec8cc   Nicholas Piggin   mm/vmalloc: impro...
2870
  		return NULL;
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2871
  	}
3382bbee0   Christophe Leroy   mm/vmalloc: enabl...
2872
  	if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2873
  		unsigned long size_per_node;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2874

121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
  		/*
  		 * Try huge pages. Only try for PAGE_KERNEL allocations,
  		 * others like modules don't yet expect huge pages in
  		 * their allocations due to apply_to_page_range not
  		 * supporting them.
  		 */
  
  		size_per_node = size;
  		if (node == NUMA_NO_NODE)
  			size_per_node /= num_online_nodes();
3382bbee0   Christophe Leroy   mm/vmalloc: enabl...
2885
  		if (arch_vmap_pmd_supported(prot) && size_per_node >= PMD_SIZE)
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2886
  			shift = PMD_SHIFT;
3382bbee0   Christophe Leroy   mm/vmalloc: enabl...
2887
2888
2889
2890
2891
  		else
  			shift = arch_vmap_pte_supported_shift(size_per_node);
  
  		align = max(real_align, 1UL << shift);
  		size = ALIGN(real_size, 1UL << shift);
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2892
2893
2894
  	}
  
  again:
7ca3027b7   Daniel Axtens   mm/vmalloc: unbre...
2895
2896
2897
  	area = __get_vm_area_node(real_size, align, shift, VM_ALLOC |
  				  VM_UNINITIALIZED | vm_flags, start, end, node,
  				  gfp_mask, caller);
d70bec8cc   Nicholas Piggin   mm/vmalloc: impro...
2898
2899
  	if (!area) {
  		warn_alloc(gfp_mask, NULL,
f4bdfeaf1   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
2900
2901
  			"vmalloc error: size %lu, vm_struct allocation failed",
  			real_size);
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2902
  		goto fail;
d70bec8cc   Nicholas Piggin   mm/vmalloc: impro...
2903
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2904

121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2905
  	addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
1368edf06   Mel Gorman   mm: vmalloc: chec...
2906
  	if (!addr)
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2907
  		goto fail;
89219d37a   Catalin Marinas   kmemleak: Add the...
2908
2909
  
  	/*
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2910
2911
  	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED
  	 * flag. It means that vm_struct is not fully initialized.
4341fa454   Joonsoo Kim   mm, vmalloc: remo...
2912
  	 * Now, it is fully initialized, so remove this flag here.
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2913
  	 */
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2914
  	clear_vm_uninitialized_flag(area);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2915

7ca3027b7   Daniel Axtens   mm/vmalloc: unbre...
2916
  	size = PAGE_ALIGN(size);
f1675103e   Kefeng Wang   mm: defer kmemlea...
2917
2918
  	if (!(vm_flags & VM_DEFER_KMEMLEAK))
  		kmemleak_vmalloc(area, size, gfp_mask);
89219d37a   Catalin Marinas   kmemleak: Add the...
2919
2920
  
  	return addr;
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2921
2922
  
  fail:
121e6f325   Nicholas Piggin   mm/vmalloc: hugep...
2923
2924
2925
2926
2927
2928
  	if (shift > PAGE_SHIFT) {
  		shift = PAGE_SHIFT;
  		align = real_align;
  		size = real_size;
  		goto again;
  	}
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2929
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2930
  }
d0a21265d   David Rientjes   mm: unify module_...
2931
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2932
2933
2934
2935
   * __vmalloc_node - allocate virtually contiguous memory
   * @size:	    allocation size
   * @align:	    desired alignment
   * @gfp_mask:	    flags for the page level allocator
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2936
2937
   * @node:	    node to use for allocation or NUMA_NO_NODE
   * @caller:	    caller's return address
a7c3e901a   Michal Hocko   mm: introduce kv[...
2938
   *
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2939
2940
   * Allocate enough pages to cover @size from the page level allocator with
   * @gfp_mask flags.  Map them into contiguous kernel virtual space.
a7c3e901a   Michal Hocko   mm: introduce kv[...
2941
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2942
2943
   * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
   * and __GFP_NOFAIL are not supported
a7c3e901a   Michal Hocko   mm: introduce kv[...
2944
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2945
2946
   * Any use of gfp flags outside of GFP_KERNEL should be consulted
   * with mm people.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2947
2948
   *
   * Return: pointer to the allocated memory or %NULL on error
d0a21265d   David Rientjes   mm: unify module_...
2949
   */
2b9059489   Christoph Hellwig   mm: remove __vmal...
2950
  void *__vmalloc_node(unsigned long size, unsigned long align,
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2951
  			    gfp_t gfp_mask, int node, const void *caller)
d0a21265d   David Rientjes   mm: unify module_...
2952
2953
  {
  	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2954
  				gfp_mask, PAGE_KERNEL, 0, node, caller);
d0a21265d   David Rientjes   mm: unify module_...
2955
  }
c3f896dcf   Christoph Hellwig   mm: switch the te...
2956
2957
2958
2959
2960
2961
2962
2963
  /*
   * This is only for performance analysis of vmalloc and stress purpose.
   * It is required by vmalloc test module, therefore do not use it other
   * than that.
   */
  #ifdef CONFIG_TEST_VMALLOC_MODULE
  EXPORT_SYMBOL_GPL(__vmalloc_node);
  #endif
d0a21265d   David Rientjes   mm: unify module_...
2964

88dca4ca5   Christoph Hellwig   mm: remove the pg...
2965
  void *__vmalloc(unsigned long size, gfp_t gfp_mask)
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2966
  {
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2967
  	return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
230169693   Christoph Lameter   vmallocinfo: add ...
2968
  				__builtin_return_address(0));
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2969
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2970
2971
2972
  EXPORT_SYMBOL(__vmalloc);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2973
2974
2975
2976
2977
   * vmalloc - allocate virtually contiguous memory
   * @size:    allocation size
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2978
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2979
2980
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2981
2982
   *
   * Return: pointer to the allocated memory or %NULL on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2983
2984
2985
   */
  void *vmalloc(unsigned long size)
  {
4d39d7285   Christoph Hellwig   mm: remove both i...
2986
2987
  	return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
  				__builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2988
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2989
  EXPORT_SYMBOL(vmalloc);
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2990
  /**
15a64f5a8   Claudio Imbrenda   mm/vmalloc: add v...
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
   * vmalloc_no_huge - allocate virtually contiguous memory using small pages
   * @size:    allocation size
   *
   * Allocate enough non-huge pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
   *
   * Return: pointer to the allocated memory or %NULL on error
   */
  void *vmalloc_no_huge(unsigned long size)
  {
  	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
  				    GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
  				    NUMA_NO_NODE, __builtin_return_address(0));
  }
  EXPORT_SYMBOL(vmalloc_no_huge);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3008
3009
3010
3011
3012
3013
3014
3015
3016
   * vzalloc - allocate virtually contiguous memory with zero fill
   * @size:    allocation size
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
   * The memory allocated is set to zero.
   *
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
3017
3018
   *
   * Return: pointer to the allocated memory or %NULL on error
e1ca7788d   Dave Young   mm: add vzalloc()...
3019
3020
3021
   */
  void *vzalloc(unsigned long size)
  {
4d39d7285   Christoph Hellwig   mm: remove both i...
3022
3023
  	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
  				__builtin_return_address(0));
e1ca7788d   Dave Young   mm: add vzalloc()...
3024
3025
3026
3027
  }
  EXPORT_SYMBOL(vzalloc);
  
  /**
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
3028
3029
   * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
   * @size: allocation size
833423143   Nick Piggin   [PATCH] mm: intro...
3030
   *
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
3031
3032
   * The resulting memory area is zeroed so it can be mapped to userspace
   * without leaking data.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
3033
3034
   *
   * Return: pointer to the allocated memory or %NULL on error
833423143   Nick Piggin   [PATCH] mm: intro...
3035
3036
3037
   */
  void *vmalloc_user(unsigned long size)
  {
bc84c5352   Roman Penyaev   mm/vmalloc: pass ...
3038
3039
3040
3041
  	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
  				    GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
  				    VM_USERMAP, NUMA_NO_NODE,
  				    __builtin_return_address(0));
833423143   Nick Piggin   [PATCH] mm: intro...
3042
3043
3044
3045
  }
  EXPORT_SYMBOL(vmalloc_user);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3046
3047
3048
   * vmalloc_node - allocate memory on a specific node
   * @size:	  allocation size
   * @node:	  numa node
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
3049
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3050
3051
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
3052
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3053
3054
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
3055
3056
   *
   * Return: pointer to the allocated memory or %NULL on error
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
3057
3058
3059
   */
  void *vmalloc_node(unsigned long size, int node)
  {
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
3060
3061
  	return __vmalloc_node(size, 1, GFP_KERNEL, node,
  			__builtin_return_address(0));
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
3062
3063
  }
  EXPORT_SYMBOL(vmalloc_node);
e1ca7788d   Dave Young   mm: add vzalloc()...
3064
3065
3066
3067
3068
3069
3070
3071
3072
  /**
   * vzalloc_node - allocate memory on a specific node with zero fill
   * @size:	allocation size
   * @node:	numa node
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
   * The memory allocated is set to zero.
   *
a862f68a8   Mike Rapoport   docs/core-api/mm:...
3073
   * Return: pointer to the allocated memory or %NULL on error
e1ca7788d   Dave Young   mm: add vzalloc()...
3074
3075
3076
   */
  void *vzalloc_node(unsigned long size, int node)
  {
4d39d7285   Christoph Hellwig   mm: remove both i...
3077
3078
  	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
  				__builtin_return_address(0));
e1ca7788d   Dave Young   mm: add vzalloc()...
3079
3080
  }
  EXPORT_SYMBOL(vzalloc_node);
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
3081
  #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
698d0831b   Michal Hocko   vmalloc: fix __GF...
3082
  #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
3083
  #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
698d0831b   Michal Hocko   vmalloc: fix __GF...
3084
  #define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
3085
  #else
698d0831b   Michal Hocko   vmalloc: fix __GF...
3086
3087
3088
3089
  /*
   * 64b systems should always have either DMA or DMA32 zones. For others
   * GFP_DMA32 should do the right thing and use the normal zone.
   */
68d68ff6e   Zhiyuan Dai   mm/mempool: minor...
3090
  #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
3091
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3092
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3093
3094
   * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
   * @size:	allocation size
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3095
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3096
3097
   * Allocate enough 32bit PA addressable pages to cover @size from the
   * page level allocator and map them into contiguous kernel virtual space.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
3098
3099
   *
   * Return: pointer to the allocated memory or %NULL on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3100
3101
3102
   */
  void *vmalloc_32(unsigned long size)
  {
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
3103
3104
  	return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
  			__builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3105
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3106
  EXPORT_SYMBOL(vmalloc_32);
833423143   Nick Piggin   [PATCH] mm: intro...
3107
  /**
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
3108
   * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3109
   * @size:	     allocation size
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
3110
3111
3112
   *
   * The resulting memory area is 32bit addressable and zeroed so it can be
   * mapped to userspace without leaking data.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
3113
3114
   *
   * Return: pointer to the allocated memory or %NULL on error
833423143   Nick Piggin   [PATCH] mm: intro...
3115
3116
3117
   */
  void *vmalloc_32_user(unsigned long size)
  {
bc84c5352   Roman Penyaev   mm/vmalloc: pass ...
3118
3119
3120
3121
  	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
  				    GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
  				    VM_USERMAP, NUMA_NO_NODE,
  				    __builtin_return_address(0));
833423143   Nick Piggin   [PATCH] mm: intro...
3122
3123
  }
  EXPORT_SYMBOL(vmalloc_32_user);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
  /*
   * small helper routine , copy contents to buf from addr.
   * If the page is not present, fill zero.
   */
  
  static int aligned_vread(char *buf, char *addr, unsigned long count)
  {
  	struct page *p;
  	int copied = 0;
  
  	while (count) {
  		unsigned long offset, length;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
3136
  		offset = offset_in_page(addr);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3137
3138
3139
3140
3141
3142
3143
  		length = PAGE_SIZE - offset;
  		if (length > count)
  			length = count;
  		p = vmalloc_to_page(addr);
  		/*
  		 * To do safe access to this _mapped_ area, we need
  		 * lock. But adding lock here means that we need to add
f0953a1bb   Ingo Molnar   mm: fix typos in ...
3144
  		 * overhead of vmalloc()/vfree() calls for this _debug_
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3145
3146
3147
3148
  		 * interface, rarely used. Instead of that, we'll use
  		 * kmap() and get small overhead in this access function.
  		 */
  		if (p) {
f7c8ce44e   David Hildenbrand   mm/vmalloc: remov...
3149
  			/* We can expect USER0 is not used -- see vread() */
9b04c5fec   Cong Wang   mm: remove the se...
3150
  			void *map = kmap_atomic(p);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3151
  			memcpy(buf, map + offset, length);
9b04c5fec   Cong Wang   mm: remove the se...
3152
  			kunmap_atomic(map);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
  		} else
  			memset(buf, 0, length);
  
  		addr += length;
  		buf += length;
  		copied += length;
  		count -= length;
  	}
  	return copied;
  }
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3163
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3164
3165
3166
3167
3168
   * vread() - read vmalloc area in a safe way.
   * @buf:     buffer for reading data
   * @addr:    vm address.
   * @count:   number of bytes to be read.
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
   * This function checks that addr is a valid vmalloc'ed area, and
   * copy data from that area to a given buffer. If the given memory range
   * of [addr...addr+count) includes some valid address, data is copied to
   * proper area of @buf. If there are memory holes, they'll be zero-filled.
   * IOREMAP area is treated as memory hole and no copy is done.
   *
   * If [addr...addr+count) doesn't includes any intersects with alive
   * vm_struct area, returns 0. @buf should be kernel's buffer.
   *
   * Note: In usual ops, vread() is never necessary because the caller
   * should know vmalloc() area is valid and can use memcpy().
   * This is for routines which have to access vmalloc area without
bbcd53c96   David Hildenbrand   drivers/char: rem...
3181
   * any information, as /proc/kcore.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
3182
3183
3184
3185
   *
   * Return: number of bytes for which addr and buf should be increased
   * (same number as @count) or %0 if [addr...addr+count) doesn't
   * include any intersection with valid vmalloc area
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3186
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3187
3188
  long vread(char *buf, char *addr, unsigned long count)
  {
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
3189
3190
  	struct vmap_area *va;
  	struct vm_struct *vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3191
  	char *vaddr, *buf_start = buf;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3192
  	unsigned long buflen = count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3193
3194
3195
3196
3197
  	unsigned long n;
  
  	/* Don't allow overflow */
  	if ((unsigned long) addr + count < count)
  		count = -(unsigned long) addr;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
3198
  	spin_lock(&vmap_area_lock);
f181234a5   Chen Wandun   mm/vmalloc: fix w...
3199
  	va = find_vmap_area_exceed_addr((unsigned long)addr);
f608788cd   Serapheim Dimitropoulos   mm/vmalloc: use r...
3200
3201
  	if (!va)
  		goto finished;
f181234a5   Chen Wandun   mm/vmalloc: fix w...
3202
3203
3204
3205
  
  	/* no intersects with alive vmap_area */
  	if ((unsigned long)addr + count <= va->va_start)
  		goto finished;
f608788cd   Serapheim Dimitropoulos   mm/vmalloc: use r...
3206
  	list_for_each_entry_from(va, &vmap_area_list, list) {
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
3207
3208
  		if (!count)
  			break;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
3209
  		if (!va->vm)
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
3210
3211
3212
3213
  			continue;
  
  		vm = va->vm;
  		vaddr = (char *) vm->addr;
762216ab4   Wanpeng Li   mm/vmalloc: use w...
3214
  		if (addr >= vaddr + get_vm_area_size(vm))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3215
3216
3217
3218
3219
3220
3221
3222
3223
  			continue;
  		while (addr < vaddr) {
  			if (count == 0)
  				goto finished;
  			*buf = '\0';
  			buf++;
  			addr++;
  			count--;
  		}
762216ab4   Wanpeng Li   mm/vmalloc: use w...
3224
  		n = vaddr + get_vm_area_size(vm) - addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3225
3226
  		if (n > count)
  			n = count;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
3227
  		if (!(vm->flags & VM_IOREMAP))
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3228
3229
3230
3231
3232
3233
  			aligned_vread(buf, addr, n);
  		else /* IOREMAP area is treated as memory hole */
  			memset(buf, 0, n);
  		buf += n;
  		addr += n;
  		count -= n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3234
3235
  	}
  finished:
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
3236
  	spin_unlock(&vmap_area_lock);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3237
3238
3239
3240
3241
3242
3243
3244
  
  	if (buf == buf_start)
  		return 0;
  	/* zero-fill memory holes */
  	if (buf != buf_start + buflen)
  		memset(buf, 0, buflen - (buf - buf_start));
  
  	return buflen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3245
  }
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
3246
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3247
3248
3249
3250
   * remap_vmalloc_range_partial - map vmalloc pages to userspace
   * @vma:		vma to cover
   * @uaddr:		target user address to start at
   * @kaddr:		virtual address of vmalloc kernel memory
bdebd6a28   Jann Horn   vmalloc: fix rema...
3251
   * @pgoff:		offset from @kaddr to start at
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3252
   * @size:		size of map area
7682486b3   Randy Dunlap   mm: fix various k...
3253
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3254
   * Returns:	0 for success, -Exxx on failure
833423143   Nick Piggin   [PATCH] mm: intro...
3255
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3256
3257
3258
3259
   * This function checks that @kaddr is a valid vmalloc'ed area,
   * and that it is big enough to cover the range starting at
   * @uaddr in @vma. Will return failure if that criteria isn't
   * met.
833423143   Nick Piggin   [PATCH] mm: intro...
3260
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3261
   * Similar to remap_pfn_range() (see mm/memory.c)
833423143   Nick Piggin   [PATCH] mm: intro...
3262
   */
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3263
  int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
bdebd6a28   Jann Horn   vmalloc: fix rema...
3264
3265
  				void *kaddr, unsigned long pgoff,
  				unsigned long size)
833423143   Nick Piggin   [PATCH] mm: intro...
3266
3267
  {
  	struct vm_struct *area;
bdebd6a28   Jann Horn   vmalloc: fix rema...
3268
3269
3270
3271
3272
  	unsigned long off;
  	unsigned long end_index;
  
  	if (check_shl_overflow(pgoff, PAGE_SHIFT, &off))
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
3273

e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3274
3275
3276
  	size = PAGE_ALIGN(size);
  
  	if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
833423143   Nick Piggin   [PATCH] mm: intro...
3277
  		return -EINVAL;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3278
  	area = find_vm_area(kaddr);
833423143   Nick Piggin   [PATCH] mm: intro...
3279
  	if (!area)
db64fe022   Nick Piggin   mm: rewrite vmap ...
3280
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
3281

fe9041c24   Christoph Hellwig   vmalloc: lift the...
3282
  	if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
db64fe022   Nick Piggin   mm: rewrite vmap ...
3283
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
3284

bdebd6a28   Jann Horn   vmalloc: fix rema...
3285
3286
  	if (check_add_overflow(size, off, &end_index) ||
  	    end_index > get_vm_area_size(area))
db64fe022   Nick Piggin   mm: rewrite vmap ...
3287
  		return -EINVAL;
bdebd6a28   Jann Horn   vmalloc: fix rema...
3288
  	kaddr += off;
833423143   Nick Piggin   [PATCH] mm: intro...
3289

833423143   Nick Piggin   [PATCH] mm: intro...
3290
  	do {
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3291
  		struct page *page = vmalloc_to_page(kaddr);
db64fe022   Nick Piggin   mm: rewrite vmap ...
3292
  		int ret;
833423143   Nick Piggin   [PATCH] mm: intro...
3293
3294
3295
3296
3297
  		ret = vm_insert_page(vma, uaddr, page);
  		if (ret)
  			return ret;
  
  		uaddr += PAGE_SIZE;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3298
3299
3300
  		kaddr += PAGE_SIZE;
  		size -= PAGE_SIZE;
  	} while (size > 0);
833423143   Nick Piggin   [PATCH] mm: intro...
3301

314e51b98   Konstantin Khlebnikov   mm: kill vma flag...
3302
  	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
833423143   Nick Piggin   [PATCH] mm: intro...
3303

db64fe022   Nick Piggin   mm: rewrite vmap ...
3304
  	return 0;
833423143   Nick Piggin   [PATCH] mm: intro...
3305
  }
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3306
3307
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3308
3309
3310
3311
   * remap_vmalloc_range - map vmalloc pages to userspace
   * @vma:		vma to cover (map full range of vma)
   * @addr:		vmalloc memory
   * @pgoff:		number of pages into addr before first page to map
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3312
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3313
   * Returns:	0 for success, -Exxx on failure
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3314
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3315
3316
3317
   * This function checks that addr is a valid vmalloc'ed area, and
   * that it is big enough to cover the vma. Will return failure if
   * that criteria isn't met.
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3318
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
3319
   * Similar to remap_pfn_range() (see mm/memory.c)
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3320
3321
3322
3323
3324
   */
  int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
  						unsigned long pgoff)
  {
  	return remap_vmalloc_range_partial(vma, vma->vm_start,
bdebd6a28   Jann Horn   vmalloc: fix rema...
3325
  					   addr, pgoff,
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
3326
3327
  					   vma->vm_end - vma->vm_start);
  }
833423143   Nick Piggin   [PATCH] mm: intro...
3328
  EXPORT_SYMBOL(remap_vmalloc_range);
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
3329
3330
3331
3332
3333
3334
3335
3336
  void free_vm_area(struct vm_struct *area)
  {
  	struct vm_struct *ret;
  	ret = remove_vm_area(area->addr);
  	BUG_ON(ret != area);
  	kfree(area);
  }
  EXPORT_SYMBOL_GPL(free_vm_area);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3337

4f8b02b4e   Tejun Heo   vmalloc: pcpu_get...
3338
  #ifdef CONFIG_SMP
ca23e405e   Tejun Heo   vmalloc: implemen...
3339
3340
  static struct vmap_area *node_to_va(struct rb_node *n)
  {
4583e7731   Geliang Tang   mm/vmalloc.c: use...
3341
  	return rb_entry_safe(n, struct vmap_area, rb_node);
ca23e405e   Tejun Heo   vmalloc: implemen...
3342
3343
3344
  }
  
  /**
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3345
3346
   * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
   * @addr: target address
ca23e405e   Tejun Heo   vmalloc: implemen...
3347
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3348
3349
3350
3351
   * Returns: vmap_area if it is found. If there is no such area
   *   the first highest(reverse order) vmap_area is returned
   *   i.e. va->va_start < addr && va->va_end < addr or NULL
   *   if there are no any areas before @addr.
ca23e405e   Tejun Heo   vmalloc: implemen...
3352
   */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3353
3354
  static struct vmap_area *
  pvm_find_va_enclose_addr(unsigned long addr)
ca23e405e   Tejun Heo   vmalloc: implemen...
3355
  {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3356
3357
3358
3359
3360
  	struct vmap_area *va, *tmp;
  	struct rb_node *n;
  
  	n = free_vmap_area_root.rb_node;
  	va = NULL;
ca23e405e   Tejun Heo   vmalloc: implemen...
3361
3362
  
  	while (n) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3363
3364
3365
3366
3367
  		tmp = rb_entry(n, struct vmap_area, rb_node);
  		if (tmp->va_start <= addr) {
  			va = tmp;
  			if (tmp->va_end >= addr)
  				break;
ca23e405e   Tejun Heo   vmalloc: implemen...
3368
  			n = n->rb_right;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3369
3370
3371
  		} else {
  			n = n->rb_left;
  		}
ca23e405e   Tejun Heo   vmalloc: implemen...
3372
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3373
  	return va;
ca23e405e   Tejun Heo   vmalloc: implemen...
3374
3375
3376
  }
  
  /**
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3377
3378
3379
3380
3381
   * pvm_determine_end_from_reverse - find the highest aligned address
   * of free block below VMALLOC_END
   * @va:
   *   in - the VA we start the search(reverse order);
   *   out - the VA with the highest aligned end address.
799fa85d6   Alex Shi   mm/vmalloc: add '...
3382
   * @align: alignment for required highest address
ca23e405e   Tejun Heo   vmalloc: implemen...
3383
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3384
   * Returns: determined end address within vmap_area
ca23e405e   Tejun Heo   vmalloc: implemen...
3385
   */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3386
3387
  static unsigned long
  pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
ca23e405e   Tejun Heo   vmalloc: implemen...
3388
  {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3389
  	unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
ca23e405e   Tejun Heo   vmalloc: implemen...
3390
  	unsigned long addr;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3391
3392
3393
3394
3395
3396
3397
  	if (likely(*va)) {
  		list_for_each_entry_from_reverse((*va),
  				&free_vmap_area_list, list) {
  			addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
  			if ((*va)->va_start < addr)
  				return addr;
  		}
ca23e405e   Tejun Heo   vmalloc: implemen...
3398
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3399
  	return 0;
ca23e405e   Tejun Heo   vmalloc: implemen...
3400
3401
3402
3403
3404
3405
3406
3407
  }
  
  /**
   * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
   * @offsets: array containing offset of each area
   * @sizes: array containing size of each area
   * @nr_vms: the number of areas to allocate
   * @align: alignment, all entries in @offsets and @sizes must be aligned to this
ca23e405e   Tejun Heo   vmalloc: implemen...
3408
3409
3410
3411
3412
3413
   *
   * Returns: kmalloc'd vm_struct pointer array pointing to allocated
   *	    vm_structs on success, %NULL on failure
   *
   * Percpu allocator wants to use congruent vm areas so that it can
   * maintain the offsets among percpu areas.  This function allocates
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3414
3415
3416
3417
   * congruent vmalloc areas for it with GFP_KERNEL.  These areas tend to
   * be scattered pretty far, distance between two areas easily going up
   * to gigabytes.  To avoid interacting with regular vmallocs, these
   * areas are allocated from top.
ca23e405e   Tejun Heo   vmalloc: implemen...
3418
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3419
3420
3421
3422
3423
3424
   * Despite its complicated look, this allocator is rather simple. It
   * does everything top-down and scans free blocks from the end looking
   * for matching base. While scanning, if any of the areas do not fit the
   * base address is pulled down to fit the area. Scanning is repeated till
   * all the areas fit and then all necessary data structures are inserted
   * and the result is returned.
ca23e405e   Tejun Heo   vmalloc: implemen...
3425
3426
3427
   */
  struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
  				     const size_t *sizes, int nr_vms,
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3428
  				     size_t align)
ca23e405e   Tejun Heo   vmalloc: implemen...
3429
3430
3431
  {
  	const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
  	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3432
  	struct vmap_area **vas, *va;
ca23e405e   Tejun Heo   vmalloc: implemen...
3433
3434
  	struct vm_struct **vms;
  	int area, area2, last_area, term_area;
253a496d8   Daniel Axtens   kasan: don't assu...
3435
  	unsigned long base, start, size, end, last_end, orig_start, orig_end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3436
  	bool purged = false;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3437
  	enum fit_type type;
ca23e405e   Tejun Heo   vmalloc: implemen...
3438

ca23e405e   Tejun Heo   vmalloc: implemen...
3439
  	/* verify parameters and allocate data structures */
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
3440
  	BUG_ON(offset_in_page(align) || !is_power_of_2(align));
ca23e405e   Tejun Heo   vmalloc: implemen...
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
  	for (last_area = 0, area = 0; area < nr_vms; area++) {
  		start = offsets[area];
  		end = start + sizes[area];
  
  		/* is everything aligned properly? */
  		BUG_ON(!IS_ALIGNED(offsets[area], align));
  		BUG_ON(!IS_ALIGNED(sizes[area], align));
  
  		/* detect the area with the highest address */
  		if (start > offsets[last_area])
  			last_area = area;
c568da282   Wei Yang   mm/vmalloc.c: hal...
3452
  		for (area2 = area + 1; area2 < nr_vms; area2++) {
ca23e405e   Tejun Heo   vmalloc: implemen...
3453
3454
  			unsigned long start2 = offsets[area2];
  			unsigned long end2 = start2 + sizes[area2];
c568da282   Wei Yang   mm/vmalloc.c: hal...
3455
  			BUG_ON(start2 < end && start < end2);
ca23e405e   Tejun Heo   vmalloc: implemen...
3456
3457
3458
3459
3460
3461
3462
3463
  		}
  	}
  	last_end = offsets[last_area] + sizes[last_area];
  
  	if (vmalloc_end - vmalloc_start < last_end) {
  		WARN_ON(true);
  		return NULL;
  	}
4d67d8605   Thomas Meyer   mm: use kcalloc()...
3464
3465
  	vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
  	vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
ca23e405e   Tejun Heo   vmalloc: implemen...
3466
  	if (!vas || !vms)
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3467
  		goto err_free2;
ca23e405e   Tejun Heo   vmalloc: implemen...
3468
3469
  
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3470
  		vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3471
  		vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
ca23e405e   Tejun Heo   vmalloc: implemen...
3472
3473
3474
3475
  		if (!vas[area] || !vms[area])
  			goto err_free;
  	}
  retry:
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3476
  	spin_lock(&free_vmap_area_lock);
ca23e405e   Tejun Heo   vmalloc: implemen...
3477
3478
3479
3480
3481
  
  	/* start scanning - we scan from the top, begin with the last area */
  	area = term_area = last_area;
  	start = offsets[area];
  	end = start + sizes[area];
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3482
3483
  	va = pvm_find_va_enclose_addr(vmalloc_end);
  	base = pvm_determine_end_from_reverse(&va, align) - end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3484
3485
  
  	while (true) {
ca23e405e   Tejun Heo   vmalloc: implemen...
3486
3487
3488
3489
  		/*
  		 * base might have underflowed, add last_end before
  		 * comparing.
  		 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3490
3491
  		if (base + last_end < vmalloc_start + last_end)
  			goto overflow;
ca23e405e   Tejun Heo   vmalloc: implemen...
3492
3493
  
  		/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3494
  		 * Fitting base has not been found.
ca23e405e   Tejun Heo   vmalloc: implemen...
3495
  		 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3496
3497
  		if (va == NULL)
  			goto overflow;
ca23e405e   Tejun Heo   vmalloc: implemen...
3498
3499
  
  		/*
d8cc323d9   Qiujun Huang   mm/vmalloc: fix a...
3500
  		 * If required width exceeds current VA block, move
5336e52c9   Kuppuswamy Sathyanarayanan   mm/vmalloc.c: fix...
3501
3502
3503
3504
3505
3506
3507
3508
3509
  		 * base downwards and then recheck.
  		 */
  		if (base + end > va->va_end) {
  			base = pvm_determine_end_from_reverse(&va, align) - end;
  			term_area = area;
  			continue;
  		}
  
  		/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3510
  		 * If this VA does not fit, move base downwards and recheck.
ca23e405e   Tejun Heo   vmalloc: implemen...
3511
  		 */
5336e52c9   Kuppuswamy Sathyanarayanan   mm/vmalloc.c: fix...
3512
  		if (base + start < va->va_start) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3513
3514
  			va = node_to_va(rb_prev(&va->rb_node));
  			base = pvm_determine_end_from_reverse(&va, align) - end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
  			term_area = area;
  			continue;
  		}
  
  		/*
  		 * This area fits, move on to the previous one.  If
  		 * the previous one is the terminal one, we're done.
  		 */
  		area = (area + nr_vms - 1) % nr_vms;
  		if (area == term_area)
  			break;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3526

ca23e405e   Tejun Heo   vmalloc: implemen...
3527
3528
  		start = offsets[area];
  		end = start + sizes[area];
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3529
  		va = pvm_find_va_enclose_addr(base + end);
ca23e405e   Tejun Heo   vmalloc: implemen...
3530
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3531

ca23e405e   Tejun Heo   vmalloc: implemen...
3532
3533
  	/* we've found a fitting base, insert all va's */
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3534
  		int ret;
ca23e405e   Tejun Heo   vmalloc: implemen...
3535

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3536
3537
  		start = base + offsets[area];
  		size = sizes[area];
ca23e405e   Tejun Heo   vmalloc: implemen...
3538

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
  		va = pvm_find_va_enclose_addr(start);
  		if (WARN_ON_ONCE(va == NULL))
  			/* It is a BUG(), but trigger recovery instead. */
  			goto recovery;
  
  		type = classify_va_fit_type(va, start, size);
  		if (WARN_ON_ONCE(type == NOTHING_FIT))
  			/* It is a BUG(), but trigger recovery instead. */
  			goto recovery;
  
  		ret = adjust_va_to_fit_type(va, start, size, type);
  		if (unlikely(ret))
  			goto recovery;
  
  		/* Allocated area. */
  		va = vas[area];
  		va->va_start = start;
  		va->va_end = start + size;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3557
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3558

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3559
  	spin_unlock(&free_vmap_area_lock);
ca23e405e   Tejun Heo   vmalloc: implemen...
3560

253a496d8   Daniel Axtens   kasan: don't assu...
3561
3562
3563
3564
3565
3566
3567
3568
  	/* populate the kasan shadow space */
  	for (area = 0; area < nr_vms; area++) {
  		if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
  			goto err_free_shadow;
  
  		kasan_unpoison_vmalloc((void *)vas[area]->va_start,
  				       sizes[area]);
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3569
  	/* insert all vm's */
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3570
3571
3572
3573
3574
  	spin_lock(&vmap_area_lock);
  	for (area = 0; area < nr_vms; area++) {
  		insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list);
  
  		setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
3645cb4a4   Zhang Yanfei   mm, vmalloc: call...
3575
  				 pcpu_get_vm_areas);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3576
3577
  	}
  	spin_unlock(&vmap_area_lock);
ca23e405e   Tejun Heo   vmalloc: implemen...
3578
3579
3580
  
  	kfree(vas);
  	return vms;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3581
  recovery:
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3582
3583
3584
3585
3586
3587
  	/*
  	 * Remove previously allocated areas. There is no
  	 * need in removing these areas from the busy tree,
  	 * because they are inserted only on the final step
  	 * and when pcpu_get_vm_areas() is success.
  	 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3588
  	while (area--) {
253a496d8   Daniel Axtens   kasan: don't assu...
3589
3590
  		orig_start = vas[area]->va_start;
  		orig_end = vas[area]->va_end;
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3591
3592
  		va = merge_or_add_vmap_area_augment(vas[area], &free_vmap_area_root,
  				&free_vmap_area_list);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
3593
3594
3595
  		if (va)
  			kasan_release_vmalloc(orig_start, orig_end,
  				va->va_start, va->va_end);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3596
3597
3598
3599
  		vas[area] = NULL;
  	}
  
  overflow:
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3600
  	spin_unlock(&free_vmap_area_lock);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
  	if (!purged) {
  		purge_vmap_area_lazy();
  		purged = true;
  
  		/* Before "retry", check if we recover. */
  		for (area = 0; area < nr_vms; area++) {
  			if (vas[area])
  				continue;
  
  			vas[area] = kmem_cache_zalloc(
  				vmap_area_cachep, GFP_KERNEL);
  			if (!vas[area])
  				goto err_free;
  		}
  
  		goto retry;
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3618
3619
  err_free:
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3620
3621
  		if (vas[area])
  			kmem_cache_free(vmap_area_cachep, vas[area]);
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3622
  		kfree(vms[area]);
ca23e405e   Tejun Heo   vmalloc: implemen...
3623
  	}
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3624
  err_free2:
ca23e405e   Tejun Heo   vmalloc: implemen...
3625
3626
3627
  	kfree(vas);
  	kfree(vms);
  	return NULL;
253a496d8   Daniel Axtens   kasan: don't assu...
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
  
  err_free_shadow:
  	spin_lock(&free_vmap_area_lock);
  	/*
  	 * We release all the vmalloc shadows, even the ones for regions that
  	 * hadn't been successfully added. This relies on kasan_release_vmalloc
  	 * being able to tolerate this case.
  	 */
  	for (area = 0; area < nr_vms; area++) {
  		orig_start = vas[area]->va_start;
  		orig_end = vas[area]->va_end;
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3639
3640
  		va = merge_or_add_vmap_area_augment(vas[area], &free_vmap_area_root,
  				&free_vmap_area_list);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
3641
3642
3643
  		if (va)
  			kasan_release_vmalloc(orig_start, orig_end,
  				va->va_start, va->va_end);
253a496d8   Daniel Axtens   kasan: don't assu...
3644
3645
3646
3647
3648
3649
3650
  		vas[area] = NULL;
  		kfree(vms[area]);
  	}
  	spin_unlock(&free_vmap_area_lock);
  	kfree(vas);
  	kfree(vms);
  	return NULL;
ca23e405e   Tejun Heo   vmalloc: implemen...
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
  }
  
  /**
   * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
   * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
   * @nr_vms: the number of allocated areas
   *
   * Free vm_structs and the array allocated by pcpu_get_vm_areas().
   */
  void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
  {
  	int i;
  
  	for (i = 0; i < nr_vms; i++)
  		free_vm_area(vms[i]);
  	kfree(vms);
  }
4f8b02b4e   Tejun Heo   vmalloc: pcpu_get...
3668
  #endif	/* CONFIG_SMP */
a10aa5798   Christoph Lameter   vmalloc: show vma...
3669

5bb1bb353   Paul E. McKenney   mm: Don't build m...
3670
  #ifdef CONFIG_PRINTK
98f180837   Paul E. McKenney   mm: Make mem_dump...
3671
3672
3673
3674
3675
3676
3677
3678
  bool vmalloc_dump_obj(void *object)
  {
  	struct vm_struct *vm;
  	void *objp = (void *)PAGE_ALIGN((unsigned long)object);
  
  	vm = find_vm_area(objp);
  	if (!vm)
  		return false;
bd34dcd41   Paul E. McKenney   mm: Make mem_obj_...
3679
3680
3681
  	pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS
  ",
  		vm->nr_pages, (unsigned long)vm->addr, vm->caller);
98f180837   Paul E. McKenney   mm: Make mem_dump...
3682
3683
  	return true;
  }
5bb1bb353   Paul E. McKenney   mm: Don't build m...
3684
  #endif
98f180837   Paul E. McKenney   mm: Make mem_dump...
3685

a10aa5798   Christoph Lameter   vmalloc: show vma...
3686
3687
  #ifdef CONFIG_PROC_FS
  static void *s_start(struct seq_file *m, loff_t *pos)
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3688
  	__acquires(&vmap_purge_lock)
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3689
  	__acquires(&vmap_area_lock)
a10aa5798   Christoph Lameter   vmalloc: show vma...
3690
  {
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3691
  	mutex_lock(&vmap_purge_lock);
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3692
  	spin_lock(&vmap_area_lock);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3693

3f5000693   zijun_hu   mm/vmalloc.c: sim...
3694
  	return seq_list_start(&vmap_area_list, *pos);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3695
3696
3697
3698
  }
  
  static void *s_next(struct seq_file *m, void *p, loff_t *pos)
  {
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3699
  	return seq_list_next(p, &vmap_area_list, pos);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3700
3701
3702
  }
  
  static void s_stop(struct seq_file *m, void *p)
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3703
  	__releases(&vmap_area_lock)
0a7dd4e90   Waiman Long   mm/vmalloc: Fix u...
3704
  	__releases(&vmap_purge_lock)
a10aa5798   Christoph Lameter   vmalloc: show vma...
3705
  {
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3706
  	spin_unlock(&vmap_area_lock);
0a7dd4e90   Waiman Long   mm/vmalloc: Fix u...
3707
  	mutex_unlock(&vmap_purge_lock);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3708
  }
a47a126ad   Eric Dumazet   vmallocinfo: add ...
3709
3710
  static void show_numa_info(struct seq_file *m, struct vm_struct *v)
  {
e5adfffc8   Kirill A. Shutemov   mm: use IS_ENABLE...
3711
  	if (IS_ENABLED(CONFIG_NUMA)) {
a47a126ad   Eric Dumazet   vmallocinfo: add ...
3712
3713
3714
3715
  		unsigned int nr, *counters = m->private;
  
  		if (!counters)
  			return;
af12346cd   Wanpeng Li   mm/vmalloc: rever...
3716
3717
  		if (v->flags & VM_UNINITIALIZED)
  			return;
7e5b528b4   Dmitry Vyukov   mm/vmalloc.c: fix...
3718
3719
  		/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
  		smp_rmb();
af12346cd   Wanpeng Li   mm/vmalloc: rever...
3720

a47a126ad   Eric Dumazet   vmallocinfo: add ...
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
  		memset(counters, 0, nr_node_ids * sizeof(unsigned int));
  
  		for (nr = 0; nr < v->nr_pages; nr++)
  			counters[page_to_nid(v->pages[nr])]++;
  
  		for_each_node_state(nr, N_HIGH_MEMORY)
  			if (counters[nr])
  				seq_printf(m, " N%u=%u", nr, counters[nr]);
  	}
  }
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3731
3732
  static void show_purge_info(struct seq_file *m)
  {
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3733
  	struct vmap_area *va;
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3734
3735
  	spin_lock(&purge_vmap_area_lock);
  	list_for_each_entry(va, &purge_vmap_area_list, list) {
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3736
3737
3738
3739
3740
  		seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area
  ",
  			(void *)va->va_start, (void *)va->va_end,
  			va->va_end - va->va_start);
  	}
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3741
  	spin_unlock(&purge_vmap_area_lock);
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3742
  }
a10aa5798   Christoph Lameter   vmalloc: show vma...
3743
3744
  static int s_show(struct seq_file *m, void *p)
  {
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3745
  	struct vmap_area *va;
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3746
  	struct vm_struct *v;
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3747
  	va = list_entry(p, struct vmap_area, list);
c2ce8c142   Wanpeng Li   mm/vmalloc: fix s...
3748
  	/*
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
3749
3750
  	 * s_show can encounter race with remove_vm_area, !vm on behalf
  	 * of vmap area is being tear down or vm_map_ram allocation.
c2ce8c142   Wanpeng Li   mm/vmalloc: fix s...
3751
  	 */
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
3752
  	if (!va->vm) {
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3753
3754
  		seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram
  ",
78c72746f   Yisheng Xie   vmalloc: show laz...
3755
  			(void *)va->va_start, (void *)va->va_end,
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3756
  			va->va_end - va->va_start);
78c72746f   Yisheng Xie   vmalloc: show laz...
3757

d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3758
  		return 0;
78c72746f   Yisheng Xie   vmalloc: show laz...
3759
  	}
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3760
3761
  
  	v = va->vm;
a10aa5798   Christoph Lameter   vmalloc: show vma...
3762

45ec16908   Kees Cook   mm: use %pK for /...
3763
  	seq_printf(m, "0x%pK-0x%pK %7ld",
a10aa5798   Christoph Lameter   vmalloc: show vma...
3764
  		v->addr, v->addr + v->size, v->size);
62c70bce8   Joe Perches   mm: convert sprin...
3765
3766
  	if (v->caller)
  		seq_printf(m, " %pS", v->caller);
230169693   Christoph Lameter   vmallocinfo: add ...
3767

a10aa5798   Christoph Lameter   vmalloc: show vma...
3768
3769
3770
3771
  	if (v->nr_pages)
  		seq_printf(m, " pages=%d", v->nr_pages);
  
  	if (v->phys_addr)
199eaa05a   Miles Chen   mm: cleanups for ...
3772
  		seq_printf(m, " phys=%pa", &v->phys_addr);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3773
3774
  
  	if (v->flags & VM_IOREMAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3775
  		seq_puts(m, " ioremap");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3776
3777
  
  	if (v->flags & VM_ALLOC)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3778
  		seq_puts(m, " vmalloc");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3779
3780
  
  	if (v->flags & VM_MAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3781
  		seq_puts(m, " vmap");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3782
3783
  
  	if (v->flags & VM_USERMAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3784
  		seq_puts(m, " user");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3785

fe9041c24   Christoph Hellwig   vmalloc: lift the...
3786
3787
  	if (v->flags & VM_DMA_COHERENT)
  		seq_puts(m, " dma-coherent");
244d63ee3   David Rientjes   mm, vmalloc: remo...
3788
  	if (is_vmalloc_addr(v->pages))
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3789
  		seq_puts(m, " vpages");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3790

a47a126ad   Eric Dumazet   vmallocinfo: add ...
3791
  	show_numa_info(m, v);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3792
3793
  	seq_putc(m, '
  ');
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3794
3795
  
  	/*
96e2db456   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3796
  	 * As a final step, dump "unpurged" areas.
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3797
3798
3799
  	 */
  	if (list_is_last(&va->list, &vmap_area_list))
  		show_purge_info(m);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3800
3801
  	return 0;
  }
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3802
  static const struct seq_operations vmalloc_op = {
a10aa5798   Christoph Lameter   vmalloc: show vma...
3803
3804
3805
3806
3807
  	.start = s_start,
  	.next = s_next,
  	.stop = s_stop,
  	.show = s_show,
  };
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3808

5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3809
3810
  static int __init proc_vmalloc_init(void)
  {
fddda2b7b   Christoph Hellwig   proc: introduce p...
3811
  	if (IS_ENABLED(CONFIG_NUMA))
0825a6f98   Joe Perches   mm: use octal not...
3812
  		proc_create_seq_private("vmallocinfo", 0400, NULL,
44414d82c   Christoph Hellwig   proc: introduce p...
3813
3814
  				&vmalloc_op,
  				nr_node_ids * sizeof(unsigned int), NULL);
fddda2b7b   Christoph Hellwig   proc: introduce p...
3815
  	else
0825a6f98   Joe Perches   mm: use octal not...
3816
  		proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3817
3818
3819
  	return 0;
  }
  module_init(proc_vmalloc_init);
db3808c1b   Joonsoo Kim   mm, vmalloc: move...
3820

a10aa5798   Christoph Lameter   vmalloc: show vma...
3821
  #endif