Blame view

mm/vmalloc.c 91.1 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3
4
5
6
   *  Copyright (C) 1993  Linus Torvalds
   *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
   *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
7
   *  Numa awareness, Christoph Lameter, SGI, June 2005
d758ffe6b   Uladzislau Rezki (Sony)   mm/vmalloc: updat...
8
   *  Improving global KVA allocator, Uladzislau Rezki, Sony, May 2019
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
   */
db64fe022   Nick Piggin   mm: rewrite vmap ...
10
  #include <linux/vmalloc.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
13
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/highmem.h>
c3edc4010   Ingo Molnar   sched/headers: Mo...
14
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
  #include <linux/slab.h>
  #include <linux/spinlock.h>
  #include <linux/interrupt.h>
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
18
  #include <linux/proc_fs.h>
a10aa5798   Christoph Lameter   vmalloc: show vma...
19
  #include <linux/seq_file.h>
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
20
  #include <linux/set_memory.h>
3ac7fe5a4   Thomas Gleixner   infrastructure to...
21
  #include <linux/debugobjects.h>
230169693   Christoph Lameter   vmallocinfo: add ...
22
  #include <linux/kallsyms.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
23
  #include <linux/list.h>
4da56b99d   Chris Wilson   mm/vmap: Add a no...
24
  #include <linux/notifier.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
25
  #include <linux/rbtree.h>
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
26
  #include <linux/xarray.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
27
  #include <linux/rcupdate.h>
f0aa66179   Tejun Heo   vmalloc: implemen...
28
  #include <linux/pfn.h>
89219d37a   Catalin Marinas   kmemleak: Add the...
29
  #include <linux/kmemleak.h>
60063497a   Arun Sharma   atomic: use <linu...
30
  #include <linux/atomic.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
31
  #include <linux/compiler.h>
32fcfd407   Al Viro   make vfree() safe...
32
  #include <linux/llist.h>
0f616be12   Toshi Kani   mm: change __get_...
33
  #include <linux/bitops.h>
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
34
  #include <linux/rbtree_augmented.h>
bdebd6a28   Jann Horn   vmalloc: fix rema...
35
  #include <linux/overflow.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
36

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
37
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
38
  #include <asm/tlbflush.h>
2dca6999e   David Miller   mm, perf_event: M...
39
  #include <asm/shmparam.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40

dd56b0464   Mel Gorman   mm: page_alloc: h...
41
  #include "internal.h"
2a681cfa5   Joerg Roedel   mm: move p?d_allo...
42
  #include "pgalloc-track.h"
dd56b0464   Mel Gorman   mm: page_alloc: h...
43

186525bd6   Ingo Molnar   mm, x86/mm: Untan...
44
45
46
47
48
49
50
  bool is_vmalloc_addr(const void *x)
  {
  	unsigned long addr = (unsigned long)x;
  
  	return addr >= VMALLOC_START && addr < VMALLOC_END;
  }
  EXPORT_SYMBOL(is_vmalloc_addr);
32fcfd407   Al Viro   make vfree() safe...
51
52
53
54
55
56
57
58
59
60
61
  struct vfree_deferred {
  	struct llist_head list;
  	struct work_struct wq;
  };
  static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
  
  static void __vunmap(const void *, int);
  
  static void free_work(struct work_struct *w)
  {
  	struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
894e58c14   Byungchul Park   mm/vmalloc.c: don...
62
63
64
65
  	struct llist_node *t, *llnode;
  
  	llist_for_each_safe(llnode, t, llist_del_all(&p->list))
  		__vunmap((void *)llnode, 1);
32fcfd407   Al Viro   make vfree() safe...
66
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
67
  /*** Page table manipulation functions ***/
b221385bc   Adrian Bunk   [PATCH] mm/: make...
68

2ba3e6947   Joerg Roedel   mm/vmalloc: track...
69
70
  static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
72
73
74
75
76
77
78
  {
  	pte_t *pte;
  
  	pte = pte_offset_kernel(pmd, addr);
  	do {
  		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
  		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
  	} while (pte++, addr += PAGE_SIZE, addr != end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
79
  	*mask |= PGTBL_PTE_MODIFIED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
  }
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
81
82
  static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83
84
85
  {
  	pmd_t *pmd;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
86
  	int cleared;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
89
90
  
  	pmd = pmd_offset(pud, addr);
  	do {
  		next = pmd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
91
92
93
94
95
96
  
  		cleared = pmd_clear_huge(pmd);
  		if (cleared || pmd_bad(*pmd))
  			*mask |= PGTBL_PMD_MODIFIED;
  
  		if (cleared)
b9820d8f3   Toshi Kani   mm: change vunmap...
97
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98
99
  		if (pmd_none_or_clear_bad(pmd))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
100
  		vunmap_pte_range(pmd, addr, next, mask);
e47110e90   Aneesh Kumar K.V   mm/vunmap: add co...
101
102
  
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
  	} while (pmd++, addr = next, addr != end);
  }
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
105
106
  static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
108
109
  {
  	pud_t *pud;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
110
  	int cleared;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111

c2febafc6   Kirill A. Shutemov   mm: convert gener...
112
  	pud = pud_offset(p4d, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
114
  	do {
  		next = pud_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
115
116
117
118
119
120
  
  		cleared = pud_clear_huge(pud);
  		if (cleared || pud_bad(*pud))
  			*mask |= PGTBL_PUD_MODIFIED;
  
  		if (cleared)
b9820d8f3   Toshi Kani   mm: change vunmap...
121
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
123
  		if (pud_none_or_clear_bad(pud))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
124
  		vunmap_pmd_range(pud, addr, next, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
126
  	} while (pud++, addr = next, addr != end);
  }
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
127
128
  static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
129
130
131
  {
  	p4d_t *p4d;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
132
  	int cleared;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
133
134
135
136
  
  	p4d = p4d_offset(pgd, addr);
  	do {
  		next = p4d_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
137
138
139
140
141
142
  
  		cleared = p4d_clear_huge(p4d);
  		if (cleared || p4d_bad(*p4d))
  			*mask |= PGTBL_P4D_MODIFIED;
  
  		if (cleared)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
143
144
145
  			continue;
  		if (p4d_none_or_clear_bad(p4d))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
146
  		vunmap_pud_range(p4d, addr, next, mask);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
147
148
  	} while (p4d++, addr = next, addr != end);
  }
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
149
150
  /**
   * unmap_kernel_range_noflush - unmap kernel VM area
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
151
   * @start: start of the VM area to unmap
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
152
153
154
155
156
157
158
159
160
161
   * @size: size of the VM area to unmap
   *
   * Unmap PFN_UP(@size) pages at @addr.  The VM area @addr and @size specify
   * should have been allocated using get_vm_area() and its friends.
   *
   * NOTE:
   * This function does NOT do any cache flushing.  The caller is responsible
   * for calling flush_cache_vunmap() on to-be-mapped areas before calling this
   * function and flush_tlb_kernel_range() after.
   */
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
162
  void unmap_kernel_range_noflush(unsigned long start, unsigned long size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163
  {
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
164
  	unsigned long end = start + size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
165
  	unsigned long next;
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
166
  	pgd_t *pgd;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
167
168
  	unsigned long addr = start;
  	pgtbl_mod_mask mask = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
169
170
171
  
  	BUG_ON(addr >= end);
  	pgd = pgd_offset_k(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
  	do {
  		next = pgd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
174
175
  		if (pgd_bad(*pgd))
  			mask |= PGTBL_PGD_MODIFIED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
177
  		if (pgd_none_or_clear_bad(pgd))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
178
  		vunmap_p4d_range(pgd, addr, next, &mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
  	} while (pgd++, addr = next, addr != end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
180
181
182
  
  	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
  		arch_sync_kernel_mappings(start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
184
185
  }
  
  static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
186
187
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
189
  {
  	pte_t *pte;
db64fe022   Nick Piggin   mm: rewrite vmap ...
190
191
192
193
  	/*
  	 * nr is a running index into the array which helps higher level
  	 * callers keep track of where we're up to.
  	 */
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
194
  	pte = pte_alloc_kernel_track(pmd, addr, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195
196
197
  	if (!pte)
  		return -ENOMEM;
  	do {
db64fe022   Nick Piggin   mm: rewrite vmap ...
198
199
200
201
202
  		struct page *page = pages[*nr];
  
  		if (WARN_ON(!pte_none(*pte)))
  			return -EBUSY;
  		if (WARN_ON(!page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
  			return -ENOMEM;
  		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
db64fe022   Nick Piggin   mm: rewrite vmap ...
205
  		(*nr)++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
  	} while (pte++, addr += PAGE_SIZE, addr != end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
207
  	*mask |= PGTBL_PTE_MODIFIED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
209
  	return 0;
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
210
  static int vmap_pmd_range(pud_t *pud, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
211
212
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
214
215
  {
  	pmd_t *pmd;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
216
  	pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
217
218
219
220
  	if (!pmd)
  		return -ENOMEM;
  	do {
  		next = pmd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
221
  		if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222
223
224
225
  			return -ENOMEM;
  	} while (pmd++, addr = next, addr != end);
  	return 0;
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
226
  static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
227
228
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
229
230
231
  {
  	pud_t *pud;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
232
  	pud = pud_alloc_track(&init_mm, p4d, addr, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
234
235
236
  	if (!pud)
  		return -ENOMEM;
  	do {
  		next = pud_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
237
  		if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
239
240
241
  			return -ENOMEM;
  	} while (pud++, addr = next, addr != end);
  	return 0;
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
242
  static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
243
244
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
245
246
247
  {
  	p4d_t *p4d;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
248
  	p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
249
250
251
252
  	if (!p4d)
  		return -ENOMEM;
  	do {
  		next = p4d_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
253
  		if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
254
255
256
257
  			return -ENOMEM;
  	} while (p4d++, addr = next, addr != end);
  	return 0;
  }
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
258
259
260
261
262
263
  /**
   * map_kernel_range_noflush - map kernel VM area with the specified pages
   * @addr: start of the VM area to map
   * @size: size of the VM area to map
   * @prot: page protection flags to use
   * @pages: pages to map
db64fe022   Nick Piggin   mm: rewrite vmap ...
264
   *
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
265
266
267
268
269
270
271
272
273
   * Map PFN_UP(@size) pages at @addr.  The VM area @addr and @size specify should
   * have been allocated using get_vm_area() and its friends.
   *
   * NOTE:
   * This function does NOT do any cache flushing.  The caller is responsible for
   * calling flush_cache_vmap() on to-be-mapped areas before calling this
   * function.
   *
   * RETURNS:
60bb44652   Christoph Hellwig   mm: don't return ...
274
   * 0 on success, -errno on failure.
db64fe022   Nick Piggin   mm: rewrite vmap ...
275
   */
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
276
277
  int map_kernel_range_noflush(unsigned long addr, unsigned long size,
  			     pgprot_t prot, struct page **pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
  {
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
279
  	unsigned long start = addr;
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
280
  	unsigned long end = addr + size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281
  	unsigned long next;
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
282
  	pgd_t *pgd;
db64fe022   Nick Piggin   mm: rewrite vmap ...
283
284
  	int err = 0;
  	int nr = 0;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
285
  	pgtbl_mod_mask mask = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
288
  
  	BUG_ON(addr >= end);
  	pgd = pgd_offset_k(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289
290
  	do {
  		next = pgd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
291
292
293
  		if (pgd_bad(*pgd))
  			mask |= PGTBL_PGD_MODIFIED;
  		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294
  		if (err)
bf88c8c83   Figo.zhang   vmalloc.c: fix do...
295
  			return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
  	} while (pgd++, addr = next, addr != end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
297

2ba3e6947   Joerg Roedel   mm/vmalloc: track...
298
299
  	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
  		arch_sync_kernel_mappings(start, end);
60bb44652   Christoph Hellwig   mm: don't return ...
300
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
  }
ed1f324c5   Christoph Hellwig   mm: remove map_vm...
302
303
  int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
  		struct page **pages)
8fc489850   Tejun Heo   vmalloc: add un/m...
304
305
  {
  	int ret;
a29adb620   Christoph Hellwig   mm: rename vmap_p...
306
307
  	ret = map_kernel_range_noflush(start, size, prot, pages);
  	flush_cache_vmap(start, start + size);
8fc489850   Tejun Heo   vmalloc: add un/m...
308
309
  	return ret;
  }
81ac3ad90   KAMEZAWA Hiroyuki   kcore: register m...
310
  int is_vmalloc_or_module_addr(const void *x)
73bdf0a60   Linus Torvalds   Introduce is_vmal...
311
312
  {
  	/*
ab4f2ee13   Russell King   [ARM] fix naming ...
313
  	 * ARM, x86-64 and sparc64 put modules in a special place,
73bdf0a60   Linus Torvalds   Introduce is_vmal...
314
315
316
317
318
319
320
321
322
323
  	 * and fall back on vmalloc() if that fails. Others
  	 * just put it in the vmalloc space.
  	 */
  #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
  	unsigned long addr = (unsigned long)x;
  	if (addr >= MODULES_VADDR && addr < MODULES_END)
  		return 1;
  #endif
  	return is_vmalloc_addr(x);
  }
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
324
  /*
add688fbd   malc   Revert "mm/vmallo...
325
   * Walk a vmap address to the struct page it maps.
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
326
   */
add688fbd   malc   Revert "mm/vmallo...
327
  struct page *vmalloc_to_page(const void *vmalloc_addr)
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
328
329
  {
  	unsigned long addr = (unsigned long) vmalloc_addr;
add688fbd   malc   Revert "mm/vmallo...
330
  	struct page *page = NULL;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
331
  	pgd_t *pgd = pgd_offset_k(addr);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
332
333
334
335
  	p4d_t *p4d;
  	pud_t *pud;
  	pmd_t *pmd;
  	pte_t *ptep, pte;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
336

7aa413def   Ingo Molnar   x86, MM: virtual ...
337
338
339
340
  	/*
  	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
  	 * architectures that do not vmalloc module space
  	 */
73bdf0a60   Linus Torvalds   Introduce is_vmal...
341
  	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
59ea74633   Jiri Slaby   MM: virtual addre...
342

c2febafc6   Kirill A. Shutemov   mm: convert gener...
343
344
345
346
347
348
  	if (pgd_none(*pgd))
  		return NULL;
  	p4d = p4d_offset(pgd, addr);
  	if (p4d_none(*p4d))
  		return NULL;
  	pud = pud_offset(p4d, addr);
029c54b09   Ard Biesheuvel   mm/vmalloc.c: hug...
349
350
351
352
353
354
355
356
357
358
359
  
  	/*
  	 * Don't dereference bad PUD or PMD (below) entries. This will also
  	 * identify huge mappings, which we may encounter on architectures
  	 * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
  	 * identified as vmalloc addresses by is_vmalloc_addr(), but are
  	 * not [unambiguously] associated with a struct page, so there is
  	 * no correct value to return for them.
  	 */
  	WARN_ON_ONCE(pud_bad(*pud));
  	if (pud_none(*pud) || pud_bad(*pud))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
360
361
  		return NULL;
  	pmd = pmd_offset(pud, addr);
029c54b09   Ard Biesheuvel   mm/vmalloc.c: hug...
362
363
  	WARN_ON_ONCE(pmd_bad(*pmd));
  	if (pmd_none(*pmd) || pmd_bad(*pmd))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
364
365
366
367
368
369
370
  		return NULL;
  
  	ptep = pte_offset_map(pmd, addr);
  	pte = *ptep;
  	if (pte_present(pte))
  		page = pte_page(pte);
  	pte_unmap(ptep);
add688fbd   malc   Revert "mm/vmallo...
371
  	return page;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
372
  }
add688fbd   malc   Revert "mm/vmallo...
373
  EXPORT_SYMBOL(vmalloc_to_page);
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
374
375
  
  /*
add688fbd   malc   Revert "mm/vmallo...
376
   * Map a vmalloc()-space virtual address to the physical page frame number.
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
377
   */
add688fbd   malc   Revert "mm/vmallo...
378
  unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
379
  {
add688fbd   malc   Revert "mm/vmallo...
380
  	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
381
  }
add688fbd   malc   Revert "mm/vmallo...
382
  EXPORT_SYMBOL(vmalloc_to_pfn);
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
383

db64fe022   Nick Piggin   mm: rewrite vmap ...
384
385
  
  /*** Global kva allocator ***/
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
386
  #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
387
  #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
388

db64fe022   Nick Piggin   mm: rewrite vmap ...
389

db64fe022   Nick Piggin   mm: rewrite vmap ...
390
  static DEFINE_SPINLOCK(vmap_area_lock);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
391
  static DEFINE_SPINLOCK(free_vmap_area_lock);
f1c4069e1   Joonsoo Kim   mm, vmalloc: expo...
392
393
  /* Export for kexec only */
  LIST_HEAD(vmap_area_list);
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
394
  static LLIST_HEAD(vmap_purge_list);
89699605f   Nick Piggin   mm: vmap area cache
395
  static struct rb_root vmap_area_root = RB_ROOT;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
396
  static bool vmap_initialized __read_mostly;
89699605f   Nick Piggin   mm: vmap area cache
397

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
  /*
   * This kmem_cache is used for vmap_area objects. Instead of
   * allocating from slab we reuse an object from this cache to
   * make things faster. Especially in "no edge" splitting of
   * free block.
   */
  static struct kmem_cache *vmap_area_cachep;
  
  /*
   * This linked list is used in pair with free_vmap_area_root.
   * It gives O(1) access to prev/next to perform fast coalescing.
   */
  static LIST_HEAD(free_vmap_area_list);
  
  /*
   * This augment red-black tree represents the free vmap space.
   * All vmap_area objects in this tree are sorted by va->va_start
   * address. It is used for allocation and merging when a vmap
   * object is released.
   *
   * Each vmap_area node contains a maximum available free block
   * of its sub-tree, right or left. Therefore it is possible to
   * find a lowest match of free area.
   */
  static struct rb_root free_vmap_area_root = RB_ROOT;
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
423
424
425
426
427
428
  /*
   * Preload a CPU with one object for "no edge" split case. The
   * aim is to get rid of allocations from the atomic context, thus
   * to use more permissive allocation masks.
   */
  static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
429
430
431
432
433
434
435
436
437
438
439
440
441
442
  static __always_inline unsigned long
  va_size(struct vmap_area *va)
  {
  	return (va->va_end - va->va_start);
  }
  
  static __always_inline unsigned long
  get_subtree_max_size(struct rb_node *node)
  {
  	struct vmap_area *va;
  
  	va = rb_entry_safe(node, struct vmap_area, rb_node);
  	return va ? va->subtree_max_size : 0;
  }
89699605f   Nick Piggin   mm: vmap area cache
443

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
444
445
446
447
448
449
450
451
452
453
  /*
   * Gets called when remove the node and rotate.
   */
  static __always_inline unsigned long
  compute_subtree_max_size(struct vmap_area *va)
  {
  	return max3(va_size(va),
  		get_subtree_max_size(va->rb_node.rb_left),
  		get_subtree_max_size(va->rb_node.rb_right));
  }
315cc066b   Michel Lespinasse   augmented rbtree:...
454
455
  RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
  	struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
456
457
458
459
  
  static void purge_vmap_area_lazy(void);
  static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
  static unsigned long lazy_max_pages(void);
db64fe022   Nick Piggin   mm: rewrite vmap ...
460

97105f0ab   Roman Gushchin   mm: vmalloc: show...
461
462
463
464
465
466
  static atomic_long_t nr_vmalloc_pages;
  
  unsigned long vmalloc_nr_pages(void)
  {
  	return atomic_long_read(&nr_vmalloc_pages);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
467
  static struct vmap_area *__find_vmap_area(unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
468
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
469
470
471
472
473
474
475
476
  	struct rb_node *n = vmap_area_root.rb_node;
  
  	while (n) {
  		struct vmap_area *va;
  
  		va = rb_entry(n, struct vmap_area, rb_node);
  		if (addr < va->va_start)
  			n = n->rb_left;
cef2ac3f6   HATAYAMA Daisuke   vmalloc: make fin...
477
  		else if (addr >= va->va_end)
db64fe022   Nick Piggin   mm: rewrite vmap ...
478
479
480
481
482
483
484
  			n = n->rb_right;
  		else
  			return va;
  	}
  
  	return NULL;
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
485
486
487
  /*
   * This function returns back addresses of parent node
   * and its left or right link for further processing.
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
488
489
490
491
   *
   * Otherwise NULL is returned. In that case all further
   * steps regarding inserting of conflicting overlap range
   * have to be declined and actually considered as a bug.
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
   */
  static __always_inline struct rb_node **
  find_va_links(struct vmap_area *va,
  	struct rb_root *root, struct rb_node *from,
  	struct rb_node **parent)
  {
  	struct vmap_area *tmp_va;
  	struct rb_node **link;
  
  	if (root) {
  		link = &root->rb_node;
  		if (unlikely(!*link)) {
  			*parent = NULL;
  			return link;
  		}
  	} else {
  		link = &from;
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
510

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
511
512
513
514
515
516
517
  	/*
  	 * Go to the bottom of the tree. When we hit the last point
  	 * we end up with parent rb_node and correct direction, i name
  	 * it link, where the new va->rb_node will be attached to.
  	 */
  	do {
  		tmp_va = rb_entry(*link, struct vmap_area, rb_node);
db64fe022   Nick Piggin   mm: rewrite vmap ...
518

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
519
520
521
522
523
524
525
526
527
528
529
  		/*
  		 * During the traversal we also do some sanity check.
  		 * Trigger the BUG() if there are sides(left/right)
  		 * or full overlaps.
  		 */
  		if (va->va_start < tmp_va->va_end &&
  				va->va_end <= tmp_va->va_start)
  			link = &(*link)->rb_left;
  		else if (va->va_end > tmp_va->va_start &&
  				va->va_start >= tmp_va->va_end)
  			link = &(*link)->rb_right;
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
530
531
532
533
534
535
536
  		else {
  			WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx
  ",
  				va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end);
  
  			return NULL;
  		}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
  	} while (*link);
  
  	*parent = &tmp_va->rb_node;
  	return link;
  }
  
  static __always_inline struct list_head *
  get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
  {
  	struct list_head *list;
  
  	if (unlikely(!parent))
  		/*
  		 * The red-black tree where we try to find VA neighbors
  		 * before merging or inserting is empty, i.e. it means
  		 * there is no free vmap space. Normally it does not
  		 * happen but we handle this case anyway.
  		 */
  		return NULL;
  
  	list = &rb_entry(parent, struct vmap_area, rb_node)->list;
  	return (&parent->rb_right == link ? list->next : list);
  }
  
  static __always_inline void
  link_va(struct vmap_area *va, struct rb_root *root,
  	struct rb_node *parent, struct rb_node **link, struct list_head *head)
  {
  	/*
  	 * VA is still not in the list, but we can
  	 * identify its future previous list_head node.
  	 */
  	if (likely(parent)) {
  		head = &rb_entry(parent, struct vmap_area, rb_node)->list;
  		if (&parent->rb_right != link)
  			head = head->prev;
db64fe022   Nick Piggin   mm: rewrite vmap ...
573
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
  	/* Insert to the rb-tree */
  	rb_link_node(&va->rb_node, parent, link);
  	if (root == &free_vmap_area_root) {
  		/*
  		 * Some explanation here. Just perform simple insertion
  		 * to the tree. We do not set va->subtree_max_size to
  		 * its current size before calling rb_insert_augmented().
  		 * It is because of we populate the tree from the bottom
  		 * to parent levels when the node _is_ in the tree.
  		 *
  		 * Therefore we set subtree_max_size to zero after insertion,
  		 * to let __augment_tree_propagate_from() puts everything to
  		 * the correct order later on.
  		 */
  		rb_insert_augmented(&va->rb_node,
  			root, &free_vmap_area_rb_augment_cb);
  		va->subtree_max_size = 0;
  	} else {
  		rb_insert_color(&va->rb_node, root);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
594

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
595
596
  	/* Address-sort this list */
  	list_add(&va->list, head);
db64fe022   Nick Piggin   mm: rewrite vmap ...
597
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
598
599
600
  static __always_inline void
  unlink_va(struct vmap_area *va, struct rb_root *root)
  {
460e42d19   Uladzislau Rezki (Sony)   mm/vmalloc.c: swi...
601
602
  	if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
  		return;
db64fe022   Nick Piggin   mm: rewrite vmap ...
603

460e42d19   Uladzislau Rezki (Sony)   mm/vmalloc.c: swi...
604
605
606
607
608
609
610
611
  	if (root == &free_vmap_area_root)
  		rb_erase_augmented(&va->rb_node,
  			root, &free_vmap_area_rb_augment_cb);
  	else
  		rb_erase(&va->rb_node, root);
  
  	list_del(&va->list);
  	RB_CLEAR_NODE(&va->rb_node);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
612
  }
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
613
614
  #if DEBUG_AUGMENT_PROPAGATE_CHECK
  static void
da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
615
  augment_tree_propagate_check(void)
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
616
617
  {
  	struct vmap_area *va;
da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
618
  	unsigned long computed_size;
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
619

da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
620
621
622
623
624
625
  	list_for_each_entry(va, &free_vmap_area_list, list) {
  		computed_size = compute_subtree_max_size(va);
  		if (computed_size != va->subtree_max_size)
  			pr_emerg("tree is corrupted: %lu, %lu
  ",
  				va_size(va), va->subtree_max_size);
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
626
  	}
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
627
628
  }
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
  /*
   * This function populates subtree_max_size from bottom to upper
   * levels starting from VA point. The propagation must be done
   * when VA size is modified by changing its va_start/va_end. Or
   * in case of newly inserting of VA to the tree.
   *
   * It means that __augment_tree_propagate_from() must be called:
   * - After VA has been inserted to the tree(free path);
   * - After VA has been shrunk(allocation path);
   * - After VA has been increased(merging path).
   *
   * Please note that, it does not mean that upper parent nodes
   * and their subtree_max_size are recalculated all the time up
   * to the root node.
   *
   *       4--8
   *        /\
   *       /  \
   *      /    \
   *    2--2  8--8
   *
   * For example if we modify the node 4, shrinking it to 2, then
   * no any modification is required. If we shrink the node 2 to 1
   * its subtree_max_size is updated only, and set to 1. If we shrink
   * the node 8 to 6, then its subtree_max_size is set to 6 and parent
   * node becomes 4--6.
   */
  static __always_inline void
  augment_tree_propagate_from(struct vmap_area *va)
  {
15ae144f7   Uladzislau Rezki (Sony)   mm/vmalloc: switc...
659
660
661
662
663
664
  	/*
  	 * Populate the tree from bottom towards the root until
  	 * the calculated maximum available size of checked node
  	 * is equal to its current one.
  	 */
  	free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL);
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
665
666
  
  #if DEBUG_AUGMENT_PROPAGATE_CHECK
da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
667
  	augment_tree_propagate_check();
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
668
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
669
670
671
672
673
674
675
676
677
678
  }
  
  static void
  insert_vmap_area(struct vmap_area *va,
  	struct rb_root *root, struct list_head *head)
  {
  	struct rb_node **link;
  	struct rb_node *parent;
  
  	link = find_va_links(va, root, NULL, &parent);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
679
680
  	if (link)
  		link_va(va, root, parent, link, head);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
681
682
683
684
685
686
687
688
689
690
691
692
693
694
  }
  
  static void
  insert_vmap_area_augment(struct vmap_area *va,
  	struct rb_node *from, struct rb_root *root,
  	struct list_head *head)
  {
  	struct rb_node **link;
  	struct rb_node *parent;
  
  	if (from)
  		link = find_va_links(va, NULL, from, &parent);
  	else
  		link = find_va_links(va, root, NULL, &parent);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
695
696
697
698
  	if (link) {
  		link_va(va, root, parent, link, head);
  		augment_tree_propagate_from(va);
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
699
700
701
702
703
704
705
  }
  
  /*
   * Merge de-allocated chunk of VA memory with previous
   * and next free blocks. If coalesce is not done a new
   * free area is inserted. If VA has been merged, it is
   * freed.
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
706
707
708
709
710
   *
   * Please note, it can return NULL in case of overlap
   * ranges, followed by WARN() report. Despite it is a
   * buggy behaviour, a system can be alive and keep
   * ongoing.
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
711
   */
3c5c3cfb9   Daniel Axtens   kasan: support ba...
712
  static __always_inline struct vmap_area *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
713
714
715
716
717
718
719
720
721
722
723
724
725
726
  merge_or_add_vmap_area(struct vmap_area *va,
  	struct rb_root *root, struct list_head *head)
  {
  	struct vmap_area *sibling;
  	struct list_head *next;
  	struct rb_node **link;
  	struct rb_node *parent;
  	bool merged = false;
  
  	/*
  	 * Find a place in the tree where VA potentially will be
  	 * inserted, unless it is merged with its sibling/siblings.
  	 */
  	link = find_va_links(va, root, NULL, &parent);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
727
728
  	if (!link)
  		return NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
  
  	/*
  	 * Get next node of VA to check if merging can be done.
  	 */
  	next = get_va_next_sibling(parent, link);
  	if (unlikely(next == NULL))
  		goto insert;
  
  	/*
  	 * start            end
  	 * |                |
  	 * |<------VA------>|<-----Next----->|
  	 *                  |                |
  	 *                  start            end
  	 */
  	if (next != head) {
  		sibling = list_entry(next, struct vmap_area, list);
  		if (sibling->va_start == va->va_end) {
  			sibling->va_start = va->va_start;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
  			/* Free vmap_area object. */
  			kmem_cache_free(vmap_area_cachep, va);
  
  			/* Point to the new merged area. */
  			va = sibling;
  			merged = true;
  		}
  	}
  
  	/*
  	 * start            end
  	 * |                |
  	 * |<-----Prev----->|<------VA------>|
  	 *                  |                |
  	 *                  start            end
  	 */
  	if (next->prev != head) {
  		sibling = list_entry(next->prev, struct vmap_area, list);
  		if (sibling->va_end == va->va_start) {
5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
767
768
769
770
771
772
773
  			/*
  			 * If both neighbors are coalesced, it is important
  			 * to unlink the "next" node first, followed by merging
  			 * with "previous" one. Otherwise the tree might not be
  			 * fully populated if a sibling's augmented value is
  			 * "normalized" because of rotation operations.
  			 */
54f63d9d8   Uladzislau Rezki (Sony)   mm/vmalloc.c: get...
774
775
  			if (merged)
  				unlink_va(va, root);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
776

5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
777
  			sibling->va_end = va->va_end;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
778
779
  			/* Free vmap_area object. */
  			kmem_cache_free(vmap_area_cachep, va);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
780
781
782
783
  
  			/* Point to the new merged area. */
  			va = sibling;
  			merged = true;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
784
785
786
787
  		}
  	}
  
  insert:
5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
788
  	if (!merged)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
789
  		link_va(va, root, parent, link, head);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
790

5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
791
792
793
794
  	/*
  	 * Last step is to check and update the tree.
  	 */
  	augment_tree_propagate_from(va);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
795
  	return va;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
  }
  
  static __always_inline bool
  is_within_this_va(struct vmap_area *va, unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	unsigned long nva_start_addr;
  
  	if (va->va_start > vstart)
  		nva_start_addr = ALIGN(va->va_start, align);
  	else
  		nva_start_addr = ALIGN(vstart, align);
  
  	/* Can be overflowed due to big size or alignment. */
  	if (nva_start_addr + size < nva_start_addr ||
  			nva_start_addr < vstart)
  		return false;
  
  	return (nva_start_addr + size <= va->va_end);
  }
  
  /*
   * Find the first free block(lowest start address) in the tree,
   * that will accomplish the request corresponding to passing
   * parameters.
   */
  static __always_inline struct vmap_area *
  find_vmap_lowest_match(unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	struct vmap_area *va;
  	struct rb_node *node;
  	unsigned long length;
  
  	/* Start from the root. */
  	node = free_vmap_area_root.rb_node;
  
  	/* Adjust the search size for alignment overhead. */
  	length = size + align - 1;
  
  	while (node) {
  		va = rb_entry(node, struct vmap_area, rb_node);
  
  		if (get_subtree_max_size(node->rb_left) >= length &&
  				vstart < va->va_start) {
  			node = node->rb_left;
  		} else {
  			if (is_within_this_va(va, size, align, vstart))
  				return va;
  
  			/*
  			 * Does not make sense to go deeper towards the right
  			 * sub-tree if it does not have a free block that is
  			 * equal or bigger to the requested search length.
  			 */
  			if (get_subtree_max_size(node->rb_right) >= length) {
  				node = node->rb_right;
  				continue;
  			}
  
  			/*
3806b0414   Andrew Morton   mm/vmalloc.c: fix...
857
  			 * OK. We roll back and find the first right sub-tree,
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
  			 * that will satisfy the search criteria. It can happen
  			 * only once due to "vstart" restriction.
  			 */
  			while ((node = rb_parent(node))) {
  				va = rb_entry(node, struct vmap_area, rb_node);
  				if (is_within_this_va(va, size, align, vstart))
  					return va;
  
  				if (get_subtree_max_size(node->rb_right) >= length &&
  						vstart <= va->va_start) {
  					node = node->rb_right;
  					break;
  				}
  			}
  		}
  	}
  
  	return NULL;
  }
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
  #include <linux/random.h>
  
  static struct vmap_area *
  find_vmap_lowest_linear_match(unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	struct vmap_area *va;
  
  	list_for_each_entry(va, &free_vmap_area_list, list) {
  		if (!is_within_this_va(va, size, align, vstart))
  			continue;
  
  		return va;
  	}
  
  	return NULL;
  }
  
  static void
  find_vmap_lowest_match_check(unsigned long size)
  {
  	struct vmap_area *va_1, *va_2;
  	unsigned long vstart;
  	unsigned int rnd;
  
  	get_random_bytes(&rnd, sizeof(rnd));
  	vstart = VMALLOC_START + rnd;
  
  	va_1 = find_vmap_lowest_match(size, 1, vstart);
  	va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
  
  	if (va_1 != va_2)
  		pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx
  ",
  			va_1, va_2, vstart);
  }
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
  enum fit_type {
  	NOTHING_FIT = 0,
  	FL_FIT_TYPE = 1,	/* full fit */
  	LE_FIT_TYPE = 2,	/* left edge fit */
  	RE_FIT_TYPE = 3,	/* right edge fit */
  	NE_FIT_TYPE = 4		/* no edge fit */
  };
  
  static __always_inline enum fit_type
  classify_va_fit_type(struct vmap_area *va,
  	unsigned long nva_start_addr, unsigned long size)
  {
  	enum fit_type type;
  
  	/* Check if it is within VA. */
  	if (nva_start_addr < va->va_start ||
  			nva_start_addr + size > va->va_end)
  		return NOTHING_FIT;
  
  	/* Now classify. */
  	if (va->va_start == nva_start_addr) {
  		if (va->va_end == nva_start_addr + size)
  			type = FL_FIT_TYPE;
  		else
  			type = LE_FIT_TYPE;
  	} else if (va->va_end == nva_start_addr + size) {
  		type = RE_FIT_TYPE;
  	} else {
  		type = NE_FIT_TYPE;
  	}
  
  	return type;
  }
  
  static __always_inline int
  adjust_va_to_fit_type(struct vmap_area *va,
  	unsigned long nva_start_addr, unsigned long size,
  	enum fit_type type)
  {
2c9292336   Arnd Bergmann   mm/vmalloc.c: avo...
954
  	struct vmap_area *lva = NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
  
  	if (type == FL_FIT_TYPE) {
  		/*
  		 * No need to split VA, it fully fits.
  		 *
  		 * |               |
  		 * V      NVA      V
  		 * |---------------|
  		 */
  		unlink_va(va, &free_vmap_area_root);
  		kmem_cache_free(vmap_area_cachep, va);
  	} else if (type == LE_FIT_TYPE) {
  		/*
  		 * Split left edge of fit VA.
  		 *
  		 * |       |
  		 * V  NVA  V   R
  		 * |-------|-------|
  		 */
  		va->va_start += size;
  	} else if (type == RE_FIT_TYPE) {
  		/*
  		 * Split right edge of fit VA.
  		 *
  		 *         |       |
  		 *     L   V  NVA  V
  		 * |-------|-------|
  		 */
  		va->va_end = nva_start_addr;
  	} else if (type == NE_FIT_TYPE) {
  		/*
  		 * Split no edge of fit VA.
  		 *
  		 *     |       |
  		 *   L V  NVA  V R
  		 * |---|-------|---|
  		 */
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
  		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
  		if (unlikely(!lva)) {
  			/*
  			 * For percpu allocator we do not do any pre-allocation
  			 * and leave it as it is. The reason is it most likely
  			 * never ends up with NE_FIT_TYPE splitting. In case of
  			 * percpu allocations offsets and sizes are aligned to
  			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
  			 * are its main fitting cases.
  			 *
  			 * There are a few exceptions though, as an example it is
  			 * a first allocation (early boot up) when we have "one"
  			 * big free space that has to be split.
060650a2a   Uladzislau Rezki (Sony)   mm/vmalloc: add m...
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
  			 *
  			 * Also we can hit this path in case of regular "vmap"
  			 * allocations, if "this" current CPU was not preloaded.
  			 * See the comment in alloc_vmap_area() why. If so, then
  			 * GFP_NOWAIT is used instead to get an extra object for
  			 * split purpose. That is rare and most time does not
  			 * occur.
  			 *
  			 * What happens if an allocation gets failed. Basically,
  			 * an "overflow" path is triggered to purge lazily freed
  			 * areas to free some memory, then, the "retry" path is
  			 * triggered to repeat one more time. See more details
  			 * in alloc_vmap_area() function.
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1018
1019
1020
1021
1022
  			 */
  			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
  			if (!lva)
  				return -1;
  		}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
  
  		/*
  		 * Build the remainder.
  		 */
  		lva->va_start = va->va_start;
  		lva->va_end = nva_start_addr;
  
  		/*
  		 * Shrink this VA to remaining size.
  		 */
  		va->va_start = nva_start_addr + size;
  	} else {
  		return -1;
  	}
  
  	if (type != FL_FIT_TYPE) {
  		augment_tree_propagate_from(va);
2c9292336   Arnd Bergmann   mm/vmalloc.c: avo...
1040
  		if (lva)	/* type == NE_FIT_TYPE */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
  			insert_vmap_area_augment(lva, &va->rb_node,
  				&free_vmap_area_root, &free_vmap_area_list);
  	}
  
  	return 0;
  }
  
  /*
   * Returns a start address of the newly allocated area, if success.
   * Otherwise a vend is returned that indicates failure.
   */
  static __always_inline unsigned long
  __alloc_vmap_area(unsigned long size, unsigned long align,
cacca6baf   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1054
  	unsigned long vstart, unsigned long vend)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
  {
  	unsigned long nva_start_addr;
  	struct vmap_area *va;
  	enum fit_type type;
  	int ret;
  
  	va = find_vmap_lowest_match(size, align, vstart);
  	if (unlikely(!va))
  		return vend;
  
  	if (va->va_start > vstart)
  		nva_start_addr = ALIGN(va->va_start, align);
  	else
  		nva_start_addr = ALIGN(vstart, align);
  
  	/* Check the "vend" restriction. */
  	if (nva_start_addr + size > vend)
  		return vend;
  
  	/* Classify what we have found. */
  	type = classify_va_fit_type(va, nva_start_addr, size);
  	if (WARN_ON_ONCE(type == NOTHING_FIT))
  		return vend;
  
  	/* Update the free vmap_area. */
  	ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
  	if (ret)
  		return vend;
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
1083
1084
1085
  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
  	find_vmap_lowest_match_check(size);
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1086
1087
  	return nva_start_addr;
  }
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1088

db64fe022   Nick Piggin   mm: rewrite vmap ...
1089
  /*
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
   * Free a region of KVA allocated by alloc_vmap_area
   */
  static void free_vmap_area(struct vmap_area *va)
  {
  	/*
  	 * Remove from the busy tree/list.
  	 */
  	spin_lock(&vmap_area_lock);
  	unlink_va(va, &vmap_area_root);
  	spin_unlock(&vmap_area_lock);
  
  	/*
  	 * Insert/Merge it back to the free tree/list.
  	 */
  	spin_lock(&free_vmap_area_lock);
  	merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
  	spin_unlock(&free_vmap_area_lock);
  }
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1110
1111
1112
1113
1114
1115
1116
1117
   * Allocate a region of KVA of the specified size and alignment, within the
   * vstart and vend.
   */
  static struct vmap_area *alloc_vmap_area(unsigned long size,
  				unsigned long align,
  				unsigned long vstart, unsigned long vend,
  				int node, gfp_t gfp_mask)
  {
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1118
  	struct vmap_area *va, *pva;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1119
  	unsigned long addr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1120
  	int purged = 0;
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1121
  	int ret;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1122

7766970cc   Nick Piggin   mm: vmap fix over...
1123
  	BUG_ON(!size);
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1124
  	BUG_ON(offset_in_page(size));
89699605f   Nick Piggin   mm: vmap area cache
1125
  	BUG_ON(!is_power_of_2(align));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1126

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1127
1128
  	if (unlikely(!vmap_initialized))
  		return ERR_PTR(-EBUSY);
5803ed292   Christoph Hellwig   mm: mark all call...
1129
  	might_sleep();
f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1130
  	gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1131

f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1132
  	va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1133
1134
  	if (unlikely(!va))
  		return ERR_PTR(-ENOMEM);
7f88f88f8   Catalin Marinas   mm: kmemleak: avo...
1135
1136
1137
1138
  	/*
  	 * Only scan the relevant parts containing pointers to other objects
  	 * to avoid false negatives.
  	 */
f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1139
  	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
7f88f88f8   Catalin Marinas   mm: kmemleak: avo...
1140

db64fe022   Nick Piggin   mm: rewrite vmap ...
1141
  retry:
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1142
  	/*
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1143
1144
1145
1146
1147
1148
  	 * Preload this CPU with one extra vmap_area object. It is used
  	 * when fit type of free area is NE_FIT_TYPE. Please note, it
  	 * does not guarantee that an allocation occurs on a CPU that
  	 * is preloaded, instead we minimize the case when it is not.
  	 * It can happen because of cpu migration, because there is a
  	 * race until the below spinlock is taken.
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1149
1150
1151
  	 *
  	 * The preload is done in non-atomic context, thus it allows us
  	 * to use more permissive allocation masks to be more stable under
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1152
1153
  	 * low memory condition and high memory pressure. In rare case,
  	 * if not preloaded, GFP_NOWAIT is used.
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1154
  	 *
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1155
  	 * Set "pva" to NULL here, because of "retry" path.
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1156
  	 */
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1157
  	pva = NULL;
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1158

81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1159
1160
1161
1162
1163
1164
  	if (!this_cpu_read(ne_fit_preload_node))
  		/*
  		 * Even if it fails we do not really care about that.
  		 * Just proceed as it is. If needed "overflow" path
  		 * will refill the cache we allocate from.
  		 */
f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1165
  		pva = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1166

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1167
  	spin_lock(&free_vmap_area_lock);
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1168
1169
1170
  
  	if (pva && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva))
  		kmem_cache_free(vmap_area_cachep, pva);
89699605f   Nick Piggin   mm: vmap area cache
1171

afd07389d   Uladzislau Rezki (Sony)   mm/vmalloc.c: fix...
1172
  	/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1173
1174
  	 * If an allocation fails, the "vend" address is
  	 * returned. Therefore trigger the overflow path.
afd07389d   Uladzislau Rezki (Sony)   mm/vmalloc.c: fix...
1175
  	 */
cacca6baf   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1176
  	addr = __alloc_vmap_area(size, align, vstart, vend);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1177
  	spin_unlock(&free_vmap_area_lock);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1178
  	if (unlikely(addr == vend))
89699605f   Nick Piggin   mm: vmap area cache
1179
  		goto overflow;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1180
1181
1182
  
  	va->va_start = addr;
  	va->va_end = addr + size;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
1183
  	va->vm = NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1184

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1185

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1186
1187
  	spin_lock(&vmap_area_lock);
  	insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1188
  	spin_unlock(&vmap_area_lock);
61e165578   Wang Xiaoqiang   mm/vmalloc.c: use...
1189
  	BUG_ON(!IS_ALIGNED(va->va_start, align));
89699605f   Nick Piggin   mm: vmap area cache
1190
1191
  	BUG_ON(va->va_start < vstart);
  	BUG_ON(va->va_end > vend);
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1192
1193
1194
1195
1196
  	ret = kasan_populate_vmalloc(addr, size);
  	if (ret) {
  		free_vmap_area(va);
  		return ERR_PTR(ret);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1197
  	return va;
89699605f   Nick Piggin   mm: vmap area cache
1198
1199
  
  overflow:
89699605f   Nick Piggin   mm: vmap area cache
1200
1201
1202
1203
1204
  	if (!purged) {
  		purge_vmap_area_lazy();
  		purged = 1;
  		goto retry;
  	}
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1205
1206
1207
1208
1209
1210
1211
1212
1213
  
  	if (gfpflags_allow_blocking(gfp_mask)) {
  		unsigned long freed = 0;
  		blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
  		if (freed > 0) {
  			purged = 0;
  			goto retry;
  		}
  	}
03497d761   Florian Fainelli   mm: Silence vmap(...
1214
  	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
756a025f0   Joe Perches   mm: coalesce spli...
1215
1216
1217
  		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size
  ",
  			size);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1218
1219
  
  	kmem_cache_free(vmap_area_cachep, va);
89699605f   Nick Piggin   mm: vmap area cache
1220
  	return ERR_PTR(-EBUSY);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1221
  }
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
  int register_vmap_purge_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_register(&vmap_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);
  
  int unregister_vmap_purge_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1233
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
   * lazy_max_pages is the maximum amount of virtual address space we gather up
   * before attempting to purge with a TLB flush.
   *
   * There is a tradeoff here: a larger number will cover more kernel page tables
   * and take slightly longer to purge, but it will linearly reduce the number of
   * global TLB flushes that must be performed. It would seem natural to scale
   * this number up linearly with the number of CPUs (because vmapping activity
   * could also scale linearly with the number of CPUs), however it is likely
   * that in practice, workloads might be constrained in other ways that mean
   * vmap activity will not scale linearly with CPUs. Also, I want to be
   * conservative and not introduce a big latency on huge systems, so go with
   * a less aggressive log scale. It will still be an improvement over the old
   * code, and it will be simple to change the scale factor if we find that it
   * becomes a problem on bigger systems.
   */
  static unsigned long lazy_max_pages(void)
  {
  	unsigned int log;
  
  	log = fls(num_online_cpus());
  
  	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
  }
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1257
  static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1258

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1259
1260
1261
1262
1263
  /*
   * Serialize vmap purging.  There is no actual criticial section protected
   * by this look, but we want to avoid concurrent calls for performance
   * reasons and to make the pcpu_get_vm_areas more deterministic.
   */
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1264
  static DEFINE_MUTEX(vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1265

02b709df8   Nick Piggin   mm: purge fragmen...
1266
1267
  /* for per-CPU blocks */
  static void purge_fragmented_blocks_allcpus(void);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1268
  /*
3ee48b6af   Cliff Wickman   mm, x86: Saving v...
1269
1270
1271
1272
1273
   * called before a call to iounmap() if the caller wants vm_area_struct's
   * immediately freed.
   */
  void set_iounmap_nonlazy(void)
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1274
  	atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
3ee48b6af   Cliff Wickman   mm, x86: Saving v...
1275
1276
1277
  }
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1278
   * Purges all lazily-freed vmap areas.
db64fe022   Nick Piggin   mm: rewrite vmap ...
1279
   */
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1280
  static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1281
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1282
  	unsigned long resched_threshold;
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1283
  	struct llist_node *valist;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1284
  	struct vmap_area *va;
cbb766766   Vegard Nossum   mm: fix lazy vmap...
1285
  	struct vmap_area *n_va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1286

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1287
  	lockdep_assert_held(&vmap_purge_lock);
02b709df8   Nick Piggin   mm: purge fragmen...
1288

80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1289
  	valist = llist_del_all(&vmap_purge_list);
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1290
1291
1292
1293
1294
1295
1296
  	if (unlikely(valist == NULL))
  		return false;
  
  	/*
  	 * TODO: to calculate a flush range without looping.
  	 * The list can be up to lazy_max_pages() elements.
  	 */
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1297
  	llist_for_each_entry(va, valist, purge_list) {
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1298
1299
1300
1301
  		if (va->va_start < start)
  			start = va->va_start;
  		if (va->va_end > end)
  			end = va->va_end;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1302
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1303

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1304
  	flush_tlb_kernel_range(start, end);
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1305
  	resched_threshold = lazy_max_pages() << 1;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1306

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1307
  	spin_lock(&free_vmap_area_lock);
763b218dd   Joel Fernandes   mm: add preempt p...
1308
  	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1309
  		unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1310
1311
  		unsigned long orig_start = va->va_start;
  		unsigned long orig_end = va->va_end;
763b218dd   Joel Fernandes   mm: add preempt p...
1312

dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1313
1314
1315
1316
1317
  		/*
  		 * Finally insert or merge lazily-freed area. It is
  		 * detached and there is no need to "unlink" it from
  		 * anything.
  		 */
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1318
1319
  		va = merge_or_add_vmap_area(va, &free_vmap_area_root,
  					    &free_vmap_area_list);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1320
1321
  		if (!va)
  			continue;
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1322
1323
1324
  		if (is_vmalloc_or_module_addr((void *)orig_start))
  			kasan_release_vmalloc(orig_start, orig_end,
  					      va->va_start, va->va_end);
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1325

4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1326
  		atomic_long_sub(nr, &vmap_lazy_nr);
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1327

4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1328
  		if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1329
  			cond_resched_lock(&free_vmap_area_lock);
763b218dd   Joel Fernandes   mm: add preempt p...
1330
  	}
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1331
  	spin_unlock(&free_vmap_area_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1332
  	return true;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1333
1334
1335
  }
  
  /*
496850e5f   Nick Piggin   mm: vmalloc failu...
1336
1337
1338
1339
1340
   * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
   * is already purging.
   */
  static void try_purge_vmap_area_lazy(void)
  {
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1341
  	if (mutex_trylock(&vmap_purge_lock)) {
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1342
  		__purge_vmap_area_lazy(ULONG_MAX, 0);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1343
  		mutex_unlock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1344
  	}
496850e5f   Nick Piggin   mm: vmalloc failu...
1345
1346
1347
  }
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1348
1349
1350
1351
   * Kick off a purge of the outstanding lazy areas.
   */
  static void purge_vmap_area_lazy(void)
  {
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1352
  	mutex_lock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1353
1354
  	purge_fragmented_blocks_allcpus();
  	__purge_vmap_area_lazy(ULONG_MAX, 0);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1355
  	mutex_unlock(&vmap_purge_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1356
1357
1358
  }
  
  /*
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1359
1360
1361
   * Free a vmap area, caller ensuring that the area has been unmapped
   * and flush_cache_vunmap had been called for the correct range
   * previously.
db64fe022   Nick Piggin   mm: rewrite vmap ...
1362
   */
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1363
  static void free_vmap_area_noflush(struct vmap_area *va)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1364
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1365
  	unsigned long nr_lazy;
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1366

dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1367
1368
1369
  	spin_lock(&vmap_area_lock);
  	unlink_va(va, &vmap_area_root);
  	spin_unlock(&vmap_area_lock);
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1370
1371
  	nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
  				PAGE_SHIFT, &vmap_lazy_nr);
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1372
1373
1374
1375
1376
  
  	/* After this point, we may free va at any time */
  	llist_add(&va->purge_list, &vmap_purge_list);
  
  	if (unlikely(nr_lazy > lazy_max_pages()))
496850e5f   Nick Piggin   mm: vmalloc failu...
1377
  		try_purge_vmap_area_lazy();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1378
  }
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1379
1380
1381
1382
1383
1384
  /*
   * Free and unmap a vmap area
   */
  static void free_unmap_vmap_area(struct vmap_area *va)
  {
  	flush_cache_vunmap(va->va_start, va->va_end);
855e57a11   Christoph Hellwig   mm: remove unmap_...
1385
  	unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start);
8e57f8acb   Vlastimil Babka   mm, debug_pageall...
1386
  	if (debug_pagealloc_enabled_static())
82a2e924f   Chintan Pandya   mm: vmalloc: clea...
1387
  		flush_tlb_kernel_range(va->va_start, va->va_end);
c8eef01e2   Christoph Hellwig   mm: remove free_u...
1388
  	free_vmap_area_noflush(va);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1389
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
  static struct vmap_area *find_vmap_area(unsigned long addr)
  {
  	struct vmap_area *va;
  
  	spin_lock(&vmap_area_lock);
  	va = __find_vmap_area(addr);
  	spin_unlock(&vmap_area_lock);
  
  	return va;
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
  /*** Per cpu kva allocator ***/
  
  /*
   * vmap space is limited especially on 32 bit architectures. Ensure there is
   * room for at least 16 percpu vmap blocks per CPU.
   */
  /*
   * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
   * to #define VMALLOC_SPACE		(VMALLOC_END-VMALLOC_START). Guess
   * instead (we just need a rough idea)
   */
  #if BITS_PER_LONG == 32
  #define VMALLOC_SPACE		(128UL*1024*1024)
  #else
  #define VMALLOC_SPACE		(128UL*1024*1024*1024)
  #endif
  
  #define VMALLOC_PAGES		(VMALLOC_SPACE / PAGE_SIZE)
  #define VMAP_MAX_ALLOC		BITS_PER_LONG	/* 256K with 4K pages */
  #define VMAP_BBMAP_BITS_MAX	1024	/* 4MB with 4K pages */
  #define VMAP_BBMAP_BITS_MIN	(VMAP_MAX_ALLOC*2)
  #define VMAP_MIN(x, y)		((x) < (y) ? (x) : (y)) /* can't use min() */
  #define VMAP_MAX(x, y)		((x) > (y) ? (x) : (y)) /* can't use max() */
f982f9151   Clemens Ladisch   mm: fix wrong vma...
1423
1424
1425
1426
  #define VMAP_BBMAP_BITS		\
  		VMAP_MIN(VMAP_BBMAP_BITS_MAX,	\
  		VMAP_MAX(VMAP_BBMAP_BITS_MIN,	\
  			VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
db64fe022   Nick Piggin   mm: rewrite vmap ...
1427
1428
1429
1430
1431
1432
  
  #define VMAP_BLOCK_SIZE		(VMAP_BBMAP_BITS * PAGE_SIZE)
  
  struct vmap_block_queue {
  	spinlock_t lock;
  	struct list_head free;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1433
1434
1435
1436
1437
  };
  
  struct vmap_block {
  	spinlock_t lock;
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1438
  	unsigned long free, dirty;
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1439
  	unsigned long dirty_min, dirty_max; /*< dirty range */
de5604231   Nick Piggin   mm: percpu-vmap f...
1440
1441
  	struct list_head free_list;
  	struct rcu_head rcu_head;
02b709df8   Nick Piggin   mm: purge fragmen...
1442
  	struct list_head purge;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1443
1444
1445
1446
1447
1448
  };
  
  /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
  static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
  
  /*
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1449
   * XArray of vmap blocks, indexed by address, to quickly find a vmap block
db64fe022   Nick Piggin   mm: rewrite vmap ...
1450
1451
1452
   * in the free path. Could get rid of this if we change the API to return a
   * "cookie" from alloc, to be passed to free. But no big deal yet.
   */
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1453
  static DEFINE_XARRAY(vmap_blocks);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
  
  /*
   * We should probably have a fallback mechanism to allocate virtual memory
   * out of partially filled vmap blocks. However vmap block sizing should be
   * fairly reasonable according to the vmalloc size, so it shouldn't be a
   * big problem.
   */
  
  static unsigned long addr_to_vb_idx(unsigned long addr)
  {
  	addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
  	addr /= VMAP_BLOCK_SIZE;
  	return addr;
  }
cf725ce27   Roman Pen   mm/vmalloc: occup...
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
  static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
  {
  	unsigned long addr;
  
  	addr = va_start + (pages_off << PAGE_SHIFT);
  	BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
  	return (void *)addr;
  }
  
  /**
   * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
   *                  block. Of course pages number can't exceed VMAP_BBMAP_BITS
   * @order:    how many 2^order pages should be occupied in newly allocated block
   * @gfp_mask: flags for the page level allocator
   *
a862f68a8   Mike Rapoport   docs/core-api/mm:...
1483
   * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
cf725ce27   Roman Pen   mm/vmalloc: occup...
1484
1485
   */
  static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1486
1487
1488
1489
1490
1491
  {
  	struct vmap_block_queue *vbq;
  	struct vmap_block *vb;
  	struct vmap_area *va;
  	unsigned long vb_idx;
  	int node, err;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1492
  	void *vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
  
  	node = numa_node_id();
  
  	vb = kmalloc_node(sizeof(struct vmap_block),
  			gfp_mask & GFP_RECLAIM_MASK, node);
  	if (unlikely(!vb))
  		return ERR_PTR(-ENOMEM);
  
  	va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
  					VMALLOC_START, VMALLOC_END,
  					node, gfp_mask);
ddf9c6d47   Tobias Klauser   vmalloc: remove r...
1504
  	if (IS_ERR(va)) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1505
  		kfree(vb);
e7d863407   Julia Lawall   mm: use ERR_CAST
1506
  		return ERR_CAST(va);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1507
  	}
cf725ce27   Roman Pen   mm/vmalloc: occup...
1508
  	vaddr = vmap_block_vaddr(va->va_start, 0);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1509
1510
  	spin_lock_init(&vb->lock);
  	vb->va = va;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1511
1512
1513
  	/* At least something should be left free */
  	BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
  	vb->free = VMAP_BBMAP_BITS - (1UL << order);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1514
  	vb->dirty = 0;
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1515
1516
  	vb->dirty_min = VMAP_BBMAP_BITS;
  	vb->dirty_max = 0;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1517
  	INIT_LIST_HEAD(&vb->free_list);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1518
1519
  
  	vb_idx = addr_to_vb_idx(va->va_start);
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1520
1521
1522
1523
1524
1525
  	err = xa_insert(&vmap_blocks, vb_idx, vb, gfp_mask);
  	if (err) {
  		kfree(vb);
  		free_vmap_area(va);
  		return ERR_PTR(err);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1526
1527
  
  	vbq = &get_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1528
  	spin_lock(&vbq->lock);
68ac546f2   Roman Pen   mm/vmalloc: fix p...
1529
  	list_add_tail_rcu(&vb->free_list, &vbq->free);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1530
  	spin_unlock(&vbq->lock);
3f04ba859   Tejun Heo   vmalloc: fix use ...
1531
  	put_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1532

cf725ce27   Roman Pen   mm/vmalloc: occup...
1533
  	return vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1534
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1535
1536
1537
  static void free_vmap_block(struct vmap_block *vb)
  {
  	struct vmap_block *tmp;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1538

0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1539
  	tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1540
  	BUG_ON(tmp != vb);
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1541
  	free_vmap_area_noflush(vb->va);
22a3c7d18   Lai Jiangshan   vmalloc,rcu: Conv...
1542
  	kfree_rcu(vb, rcu_head);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1543
  }
02b709df8   Nick Piggin   mm: purge fragmen...
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
  static void purge_fragmented_blocks(int cpu)
  {
  	LIST_HEAD(purge);
  	struct vmap_block *vb;
  	struct vmap_block *n_vb;
  	struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
  
  	rcu_read_lock();
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  
  		if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
  			continue;
  
  		spin_lock(&vb->lock);
  		if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
  			vb->free = 0; /* prevent further allocs after releasing lock */
  			vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1561
1562
  			vb->dirty_min = 0;
  			vb->dirty_max = VMAP_BBMAP_BITS;
02b709df8   Nick Piggin   mm: purge fragmen...
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
  			spin_lock(&vbq->lock);
  			list_del_rcu(&vb->free_list);
  			spin_unlock(&vbq->lock);
  			spin_unlock(&vb->lock);
  			list_add_tail(&vb->purge, &purge);
  		} else
  			spin_unlock(&vb->lock);
  	}
  	rcu_read_unlock();
  
  	list_for_each_entry_safe(vb, n_vb, &purge, purge) {
  		list_del(&vb->purge);
  		free_vmap_block(vb);
  	}
  }
02b709df8   Nick Piggin   mm: purge fragmen...
1578
1579
1580
1581
1582
1583
1584
  static void purge_fragmented_blocks_allcpus(void)
  {
  	int cpu;
  
  	for_each_possible_cpu(cpu)
  		purge_fragmented_blocks(cpu);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1585
1586
1587
1588
  static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  {
  	struct vmap_block_queue *vbq;
  	struct vmap_block *vb;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1589
  	void *vaddr = NULL;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1590
  	unsigned int order;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1591
  	BUG_ON(offset_in_page(size));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1592
  	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
aa91c4d89   Jan Kara   mm: make vb_alloc...
1593
1594
1595
1596
1597
1598
1599
1600
  	if (WARN_ON(size == 0)) {
  		/*
  		 * Allocating 0 bytes isn't what caller wants since
  		 * get_order(0) returns funny result. Just warn and terminate
  		 * early.
  		 */
  		return NULL;
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1601
  	order = get_order(size);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1602
1603
1604
  	rcu_read_lock();
  	vbq = &get_cpu_var(vmap_block_queue);
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
cf725ce27   Roman Pen   mm/vmalloc: occup...
1605
  		unsigned long pages_off;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1606
1607
  
  		spin_lock(&vb->lock);
cf725ce27   Roman Pen   mm/vmalloc: occup...
1608
1609
1610
1611
  		if (vb->free < (1UL << order)) {
  			spin_unlock(&vb->lock);
  			continue;
  		}
02b709df8   Nick Piggin   mm: purge fragmen...
1612

cf725ce27   Roman Pen   mm/vmalloc: occup...
1613
1614
  		pages_off = VMAP_BBMAP_BITS - vb->free;
  		vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
02b709df8   Nick Piggin   mm: purge fragmen...
1615
1616
1617
1618
1619
1620
  		vb->free -= 1UL << order;
  		if (vb->free == 0) {
  			spin_lock(&vbq->lock);
  			list_del_rcu(&vb->free_list);
  			spin_unlock(&vbq->lock);
  		}
cf725ce27   Roman Pen   mm/vmalloc: occup...
1621

02b709df8   Nick Piggin   mm: purge fragmen...
1622
1623
  		spin_unlock(&vb->lock);
  		break;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1624
  	}
02b709df8   Nick Piggin   mm: purge fragmen...
1625

3f04ba859   Tejun Heo   vmalloc: fix use ...
1626
  	put_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1627
  	rcu_read_unlock();
cf725ce27   Roman Pen   mm/vmalloc: occup...
1628
1629
1630
  	/* Allocate new block if nothing was found */
  	if (!vaddr)
  		vaddr = new_vmap_block(order, gfp_mask);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1631

cf725ce27   Roman Pen   mm/vmalloc: occup...
1632
  	return vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1633
  }
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1634
  static void vb_free(unsigned long addr, unsigned long size)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1635
1636
  {
  	unsigned long offset;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1637
1638
  	unsigned int order;
  	struct vmap_block *vb;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1639
  	BUG_ON(offset_in_page(size));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1640
  	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1641

78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1642
  	flush_cache_vunmap(addr, addr + size);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1643

db64fe022   Nick Piggin   mm: rewrite vmap ...
1644
  	order = get_order(size);
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1645
  	offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1646
  	vb = xa_load(&vmap_blocks, addr_to_vb_idx(addr));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1647

b521c43f5   Christoph Hellwig   mm: remove vmap_p...
1648
  	unmap_kernel_range_noflush(addr, size);
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1649

8e57f8acb   Vlastimil Babka   mm, debug_pageall...
1650
  	if (debug_pagealloc_enabled_static())
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1651
  		flush_tlb_kernel_range(addr, addr + size);
82a2e924f   Chintan Pandya   mm: vmalloc: clea...
1652

db64fe022   Nick Piggin   mm: rewrite vmap ...
1653
  	spin_lock(&vb->lock);
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1654
1655
1656
1657
  
  	/* Expand dirty range */
  	vb->dirty_min = min(vb->dirty_min, offset);
  	vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
d086817dc   MinChan Kim   vmap: remove need...
1658

db64fe022   Nick Piggin   mm: rewrite vmap ...
1659
1660
  	vb->dirty += 1UL << order;
  	if (vb->dirty == VMAP_BBMAP_BITS) {
de5604231   Nick Piggin   mm: percpu-vmap f...
1661
  		BUG_ON(vb->free);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1662
1663
1664
1665
1666
  		spin_unlock(&vb->lock);
  		free_vmap_block(vb);
  	} else
  		spin_unlock(&vb->lock);
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
1667
  static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1668
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1669
  	int cpu;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1670

9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1671
1672
  	if (unlikely(!vmap_initialized))
  		return;
5803ed292   Christoph Hellwig   mm: mark all call...
1673
  	might_sleep();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1674
1675
1676
1677
1678
1679
  	for_each_possible_cpu(cpu) {
  		struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
  		struct vmap_block *vb;
  
  		rcu_read_lock();
  		list_for_each_entry_rcu(vb, &vbq->free, free_list) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1680
  			spin_lock(&vb->lock);
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1681
1682
  			if (vb->dirty) {
  				unsigned long va_start = vb->va->va_start;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1683
  				unsigned long s, e;
b136be5e0   Joonsoo Kim   mm, vmalloc: use ...
1684

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1685
1686
  				s = va_start + (vb->dirty_min << PAGE_SHIFT);
  				e = va_start + (vb->dirty_max << PAGE_SHIFT);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1687

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1688
1689
  				start = min(s, start);
  				end   = max(e, end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1690

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1691
  				flush = 1;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1692
1693
1694
1695
1696
  			}
  			spin_unlock(&vb->lock);
  		}
  		rcu_read_unlock();
  	}
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1697
  	mutex_lock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1698
1699
1700
  	purge_fragmented_blocks_allcpus();
  	if (!__purge_vmap_area_lazy(start, end) && flush)
  		flush_tlb_kernel_range(start, end);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1701
  	mutex_unlock(&vmap_purge_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1702
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
  
  /**
   * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
   *
   * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
   * to amortize TLB flushing overheads. What this means is that any page you
   * have now, may, in a former life, have been mapped into kernel virtual
   * address by the vmap layer and so there might be some CPUs with TLB entries
   * still referencing that page (additional to the regular 1:1 kernel mapping).
   *
   * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
   * be sure that none of the pages we have control over will have any aliases
   * from the vmap layer.
   */
  void vm_unmap_aliases(void)
  {
  	unsigned long start = ULONG_MAX, end = 0;
  	int flush = 0;
  
  	_vm_unmap_aliases(start, end, flush);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1724
1725
1726
1727
1728
1729
1730
1731
1732
  EXPORT_SYMBOL_GPL(vm_unmap_aliases);
  
  /**
   * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
   * @mem: the pointer returned by vm_map_ram
   * @count: the count passed to that vm_map_ram call (cannot unmap partial)
   */
  void vm_unmap_ram(const void *mem, unsigned int count)
  {
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
1733
  	unsigned long size = (unsigned long)count << PAGE_SHIFT;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1734
  	unsigned long addr = (unsigned long)mem;
9c3acf604   Christoph Hellwig   mm: remove free_u...
1735
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1736

5803ed292   Christoph Hellwig   mm: mark all call...
1737
  	might_sleep();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1738
1739
1740
  	BUG_ON(!addr);
  	BUG_ON(addr < VMALLOC_START);
  	BUG_ON(addr > VMALLOC_END);
a1c0b1a07   Shawn Lin   mm/vmalloc: use P...
1741
  	BUG_ON(!PAGE_ALIGNED(addr));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1742

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1743
  	kasan_poison_vmalloc(mem, size);
9c3acf604   Christoph Hellwig   mm: remove free_u...
1744
  	if (likely(count <= VMAP_MAX_ALLOC)) {
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
1745
  		debug_check_no_locks_freed(mem, size);
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1746
  		vb_free(addr, size);
9c3acf604   Christoph Hellwig   mm: remove free_u...
1747
1748
1749
1750
1751
  		return;
  	}
  
  	va = find_vmap_area(addr);
  	BUG_ON(!va);
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
1752
1753
  	debug_check_no_locks_freed((void *)va->va_start,
  				    (va->va_end - va->va_start));
9c3acf604   Christoph Hellwig   mm: remove free_u...
1754
  	free_unmap_vmap_area(va);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1755
1756
1757
1758
1759
1760
1761
1762
  }
  EXPORT_SYMBOL(vm_unmap_ram);
  
  /**
   * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
   * @pages: an array of pointers to the pages to be mapped
   * @count: number of pages
   * @node: prefer to allocate data structures on this node
e99c97ade   Randy Dunlap   mm: fix kernel-do...
1763
   *
364376383   Gioh Kim   mm/vmalloc.c: enh...
1764
1765
1766
1767
1768
1769
   * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
   * faster than vmap so it's good.  But if you mix long-life and short-life
   * objects with vm_map_ram(), it could consume lots of address space through
   * fragmentation (especially on a 32bit machine).  You could see failures in
   * the end.  Please use this function for short-lived objects.
   *
e99c97ade   Randy Dunlap   mm: fix kernel-do...
1770
   * Returns: a pointer to the address that has been mapped, or %NULL on failure
db64fe022   Nick Piggin   mm: rewrite vmap ...
1771
   */
d4efd79a8   Christoph Hellwig   mm: remove the pr...
1772
  void *vm_map_ram(struct page **pages, unsigned int count, int node)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1773
  {
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
1774
  	unsigned long size = (unsigned long)count << PAGE_SHIFT;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
  	unsigned long addr;
  	void *mem;
  
  	if (likely(count <= VMAP_MAX_ALLOC)) {
  		mem = vb_alloc(size, GFP_KERNEL);
  		if (IS_ERR(mem))
  			return NULL;
  		addr = (unsigned long)mem;
  	} else {
  		struct vmap_area *va;
  		va = alloc_vmap_area(size, PAGE_SIZE,
  				VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
  		if (IS_ERR(va))
  			return NULL;
  
  		addr = va->va_start;
  		mem = (void *)addr;
  	}
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1793
1794
  
  	kasan_unpoison_vmalloc(mem, size);
d4efd79a8   Christoph Hellwig   mm: remove the pr...
1795
  	if (map_kernel_range(addr, size, PAGE_KERNEL, pages) < 0) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1796
1797
1798
1799
1800
1801
  		vm_unmap_ram(mem, count);
  		return NULL;
  	}
  	return mem;
  }
  EXPORT_SYMBOL(vm_map_ram);
4341fa454   Joonsoo Kim   mm, vmalloc: remo...
1802
  static struct vm_struct *vmlist __initdata;
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
1803

f0aa66179   Tejun Heo   vmalloc: implemen...
1804
  /**
be9b7335e   Nicolas Pitre   mm: add vm_area_a...
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
   * vm_area_add_early - add vmap area early during boot
   * @vm: vm_struct to add
   *
   * This function is used to add fixed kernel vm area to vmlist before
   * vmalloc_init() is called.  @vm->addr, @vm->size, and @vm->flags
   * should contain proper values and the other fields should be zero.
   *
   * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
   */
  void __init vm_area_add_early(struct vm_struct *vm)
  {
  	struct vm_struct *tmp, **p;
  
  	BUG_ON(vmap_initialized);
  	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
  		if (tmp->addr >= vm->addr) {
  			BUG_ON(tmp->addr < vm->addr + vm->size);
  			break;
  		} else
  			BUG_ON(tmp->addr + tmp->size > vm->addr);
  	}
  	vm->next = *p;
  	*p = vm;
  }
  
  /**
f0aa66179   Tejun Heo   vmalloc: implemen...
1831
1832
   * vm_area_register_early - register vmap area early during boot
   * @vm: vm_struct to register
c0c0a2937   Tejun Heo   vmalloc: add @ali...
1833
   * @align: requested alignment
f0aa66179   Tejun Heo   vmalloc: implemen...
1834
1835
1836
1837
1838
1839
1840
1841
   *
   * This function is used to register kernel vm area before
   * vmalloc_init() is called.  @vm->size and @vm->flags should contain
   * proper values on entry and other fields should be zero.  On return,
   * vm->addr contains the allocated address.
   *
   * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
   */
c0c0a2937   Tejun Heo   vmalloc: add @ali...
1842
  void __init vm_area_register_early(struct vm_struct *vm, size_t align)
f0aa66179   Tejun Heo   vmalloc: implemen...
1843
1844
  {
  	static size_t vm_init_off __initdata;
c0c0a2937   Tejun Heo   vmalloc: add @ali...
1845
1846
1847
1848
  	unsigned long addr;
  
  	addr = ALIGN(VMALLOC_START + vm_init_off, align);
  	vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
f0aa66179   Tejun Heo   vmalloc: implemen...
1849

c0c0a2937   Tejun Heo   vmalloc: add @ali...
1850
  	vm->addr = (void *)addr;
f0aa66179   Tejun Heo   vmalloc: implemen...
1851

be9b7335e   Nicolas Pitre   mm: add vm_area_a...
1852
  	vm_area_add_early(vm);
f0aa66179   Tejun Heo   vmalloc: implemen...
1853
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
  static void vmap_init_free_space(void)
  {
  	unsigned long vmap_start = 1;
  	const unsigned long vmap_end = ULONG_MAX;
  	struct vmap_area *busy, *free;
  
  	/*
  	 *     B     F     B     B     B     F
  	 * -|-----|.....|-----|-----|-----|.....|-
  	 *  |           The KVA space           |
  	 *  |<--------------------------------->|
  	 */
  	list_for_each_entry(busy, &vmap_area_list, list) {
  		if (busy->va_start - vmap_start > 0) {
  			free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  			if (!WARN_ON_ONCE(!free)) {
  				free->va_start = vmap_start;
  				free->va_end = busy->va_start;
  
  				insert_vmap_area_augment(free, NULL,
  					&free_vmap_area_root,
  						&free_vmap_area_list);
  			}
  		}
  
  		vmap_start = busy->va_end;
  	}
  
  	if (vmap_end - vmap_start > 0) {
  		free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  		if (!WARN_ON_ONCE(!free)) {
  			free->va_start = vmap_start;
  			free->va_end = vmap_end;
  
  			insert_vmap_area_augment(free, NULL,
  				&free_vmap_area_root,
  					&free_vmap_area_list);
  		}
  	}
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1894
1895
  void __init vmalloc_init(void)
  {
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1896
1897
  	struct vmap_area *va;
  	struct vm_struct *tmp;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1898
  	int i;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1899
1900
1901
1902
  	/*
  	 * Create the cache for vmap_area objects.
  	 */
  	vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1903
1904
  	for_each_possible_cpu(i) {
  		struct vmap_block_queue *vbq;
32fcfd407   Al Viro   make vfree() safe...
1905
  		struct vfree_deferred *p;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1906
1907
1908
1909
  
  		vbq = &per_cpu(vmap_block_queue, i);
  		spin_lock_init(&vbq->lock);
  		INIT_LIST_HEAD(&vbq->free);
32fcfd407   Al Viro   make vfree() safe...
1910
1911
1912
  		p = &per_cpu(vfree_deferred, i);
  		init_llist_head(&p->list);
  		INIT_WORK(&p->wq, free_work);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1913
  	}
9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1914

822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1915
1916
  	/* Import existing vmlist entries. */
  	for (tmp = vmlist; tmp; tmp = tmp->next) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1917
1918
1919
  		va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  		if (WARN_ON_ONCE(!va))
  			continue;
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1920
1921
  		va->va_start = (unsigned long)tmp->addr;
  		va->va_end = va->va_start + tmp->size;
dbda591d9   KyongHo   mm: fix faulty in...
1922
  		va->vm = tmp;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1923
  		insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1924
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
1925

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1926
1927
1928
1929
  	/*
  	 * Now we can initialize a free vmap space.
  	 */
  	vmap_init_free_space();
9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1930
  	vmap_initialized = true;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1931
  }
8fc489850   Tejun Heo   vmalloc: add un/m...
1932
  /**
8fc489850   Tejun Heo   vmalloc: add un/m...
1933
1934
1935
1936
1937
1938
1939
   * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
   * @addr: start of the VM area to unmap
   * @size: size of the VM area to unmap
   *
   * Similar to unmap_kernel_range_noflush() but flushes vcache before
   * the unmapping and tlb after.
   */
db64fe022   Nick Piggin   mm: rewrite vmap ...
1940
1941
1942
  void unmap_kernel_range(unsigned long addr, unsigned long size)
  {
  	unsigned long end = addr + size;
f6fcba701   Tejun Heo   vmalloc: call flu...
1943
1944
  
  	flush_cache_vunmap(addr, end);
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
1945
  	unmap_kernel_range_noflush(addr, size);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1946
1947
  	flush_tlb_kernel_range(addr, end);
  }
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1948
1949
  static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
  	struct vmap_area *va, unsigned long flags, const void *caller)
cf88c7900   Tejun Heo   vmalloc: separate...
1950
  {
cf88c7900   Tejun Heo   vmalloc: separate...
1951
1952
1953
1954
  	vm->flags = flags;
  	vm->addr = (void *)va->va_start;
  	vm->size = va->va_end - va->va_start;
  	vm->caller = caller;
db1aecafe   Minchan Kim   mm/vmalloc.c: cha...
1955
  	va->vm = vm;
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1956
1957
1958
1959
1960
1961
1962
  }
  
  static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
  			      unsigned long flags, const void *caller)
  {
  	spin_lock(&vmap_area_lock);
  	setup_vmalloc_vm_locked(vm, va, flags, caller);
c69480ade   Joonsoo Kim   mm, vmalloc: prot...
1963
  	spin_unlock(&vmap_area_lock);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
1964
  }
cf88c7900   Tejun Heo   vmalloc: separate...
1965

20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
1966
  static void clear_vm_uninitialized_flag(struct vm_struct *vm)
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
1967
  {
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
1968
  	/*
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
1969
  	 * Before removing VM_UNINITIALIZED,
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
1970
1971
1972
1973
  	 * we should make sure that vm has proper values.
  	 * Pair with smp_rmb() in show_numa_info().
  	 */
  	smp_wmb();
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
1974
  	vm->flags &= ~VM_UNINITIALIZED;
cf88c7900   Tejun Heo   vmalloc: separate...
1975
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1976
  static struct vm_struct *__get_vm_area_node(unsigned long size,
2dca6999e   David Miller   mm, perf_event: M...
1977
  		unsigned long align, unsigned long flags, unsigned long start,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
1978
  		unsigned long end, int node, gfp_t gfp_mask, const void *caller)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1979
  {
0006526d7   Kautuk Consul   mm/vmalloc.c: rem...
1980
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1981
  	struct vm_struct *area;
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1982
  	unsigned long requested_size = size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1983

52fd24ca1   Giridhar Pemmasani   [PATCH] __vmalloc...
1984
  	BUG_ON(in_interrupt());
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1985
  	size = PAGE_ALIGN(size);
31be83095   OGAWA Hirofumi   [PATCH] Fix stran...
1986
1987
  	if (unlikely(!size))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1988

252e5c6e2   zijun_hu   mm/vmalloc.c: fix...
1989
1990
1991
  	if (flags & VM_IOREMAP)
  		align = 1ul << clamp_t(int, get_count_order_long(size),
  				       PAGE_SHIFT, IOREMAP_MAX_ORDER);
cf88c7900   Tejun Heo   vmalloc: separate...
1992
  	area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1993
1994
  	if (unlikely(!area))
  		return NULL;
71394fe50   Andrey Ryabinin   mm: vmalloc: add ...
1995
1996
  	if (!(flags & VM_NO_GUARD))
  		size += PAGE_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1997

db64fe022   Nick Piggin   mm: rewrite vmap ...
1998
1999
2000
2001
  	va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
  	if (IS_ERR(va)) {
  		kfree(area);
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2002
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2003

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2004
  	kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2005

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2006
  	setup_vmalloc_vm(area, va, flags, caller);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
2007

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2008
  	return area;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2009
  }
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2010
2011
  struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
  				       unsigned long start, unsigned long end,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
2012
  				       const void *caller)
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2013
  {
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2014
2015
  	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
  				  GFP_KERNEL, caller);
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2016
  }
1ac80b081   Jan Kiszka   mm: vmalloc: Expo...
2017
  EXPORT_SYMBOL_GPL(__get_vm_area_caller);
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2018

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2019
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2020
2021
2022
   * get_vm_area - reserve a contiguous kernel virtual area
   * @size:	 size of the area
   * @flags:	 %VM_IOREMAP for I/O mappings or VM_ALLOC
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2023
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2024
2025
2026
   * Search an area of @size in the kernel virtual mapping area,
   * and reserved it for out purposes.  Returns the area descriptor
   * on success or %NULL on failure.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2027
2028
   *
   * Return: the area descriptor on success or %NULL on failure.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2029
2030
2031
   */
  struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
  {
2dca6999e   David Miller   mm, perf_event: M...
2032
  	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2033
2034
  				  NUMA_NO_NODE, GFP_KERNEL,
  				  __builtin_return_address(0));
230169693   Christoph Lameter   vmallocinfo: add ...
2035
2036
2037
  }
  
  struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
2038
  				const void *caller)
230169693   Christoph Lameter   vmallocinfo: add ...
2039
  {
2dca6999e   David Miller   mm, perf_event: M...
2040
  	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2041
  				  NUMA_NO_NODE, GFP_KERNEL, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2042
  }
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2043
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2044
2045
   * find_vm_area - find a continuous kernel virtual area
   * @addr:	  base address
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2046
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2047
2048
2049
   * Search for the kernel VM area starting at @addr, and return it.
   * It is up to the caller to do all required locking to keep the returned
   * pointer valid.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2050
   *
74640617e   Hui Su   mm/vmalloc.c: fix...
2051
   * Return: the area descriptor on success or %NULL on failure.
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2052
2053
   */
  struct vm_struct *find_vm_area(const void *addr)
833423143   Nick Piggin   [PATCH] mm: intro...
2054
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2055
  	struct vmap_area *va;
833423143   Nick Piggin   [PATCH] mm: intro...
2056

db64fe022   Nick Piggin   mm: rewrite vmap ...
2057
  	va = find_vmap_area((unsigned long)addr);
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2058
2059
  	if (!va)
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2060

688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2061
  	return va->vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2062
  }
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2063
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2064
2065
   * remove_vm_area - find and remove a continuous kernel virtual area
   * @addr:	    base address
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2066
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2067
2068
2069
   * Search for the kernel VM area starting at @addr, and remove it.
   * This function returns the found VM area, but using it is NOT safe
   * on SMP machines, except for its size or flags.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2070
   *
74640617e   Hui Su   mm/vmalloc.c: fix...
2071
   * Return: the area descriptor on success or %NULL on failure.
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2072
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2073
  struct vm_struct *remove_vm_area(const void *addr)
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2074
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2075
  	struct vmap_area *va;
5803ed292   Christoph Hellwig   mm: mark all call...
2076
  	might_sleep();
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
2077
2078
  	spin_lock(&vmap_area_lock);
  	va = __find_vmap_area((unsigned long)addr);
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2079
  	if (va && va->vm) {
db1aecafe   Minchan Kim   mm/vmalloc.c: cha...
2080
  		struct vm_struct *vm = va->vm;
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2081

c69480ade   Joonsoo Kim   mm, vmalloc: prot...
2082
  		va->vm = NULL;
c69480ade   Joonsoo Kim   mm, vmalloc: prot...
2083
  		spin_unlock(&vmap_area_lock);
a5af5aa8b   Andrey Ryabinin   kasan, module, vm...
2084
  		kasan_free_shadow(vm);
dd32c2799   KAMEZAWA Hiroyuki   vmalloc: unmap vm...
2085
  		free_unmap_vmap_area(va);
dd32c2799   KAMEZAWA Hiroyuki   vmalloc: unmap vm...
2086

db64fe022   Nick Piggin   mm: rewrite vmap ...
2087
2088
  		return vm;
  	}
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
2089
2090
  
  	spin_unlock(&vmap_area_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2091
  	return NULL;
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2092
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
  static inline void set_area_direct_map(const struct vm_struct *area,
  				       int (*set_direct_map)(struct page *page))
  {
  	int i;
  
  	for (i = 0; i < area->nr_pages; i++)
  		if (page_address(area->pages[i]))
  			set_direct_map(area->pages[i]);
  }
  
  /* Handle removing and resetting vm mappings related to the vm_struct. */
  static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
  {
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2106
2107
  	unsigned long start = ULONG_MAX, end = 0;
  	int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2108
  	int flush_dmap = 0;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2109
  	int i;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
  	remove_vm_area(area->addr);
  
  	/* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
  	if (!flush_reset)
  		return;
  
  	/*
  	 * If not deallocating pages, just do the flush of the VM area and
  	 * return.
  	 */
  	if (!deallocate_pages) {
  		vm_unmap_aliases();
  		return;
  	}
  
  	/*
  	 * If execution gets here, flush the vm mapping and reset the direct
  	 * map. Find the start and end range of the direct mappings to make sure
  	 * the vm_unmap_aliases() flush includes the direct map.
  	 */
  	for (i = 0; i < area->nr_pages; i++) {
8e41f8726   Rick Edgecombe   mm/vmalloc: Fix c...
2131
2132
  		unsigned long addr = (unsigned long)page_address(area->pages[i]);
  		if (addr) {
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2133
  			start = min(addr, start);
8e41f8726   Rick Edgecombe   mm/vmalloc: Fix c...
2134
  			end = max(addr + PAGE_SIZE, end);
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2135
  			flush_dmap = 1;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2136
2137
2138
2139
2140
2141
2142
2143
2144
  		}
  	}
  
  	/*
  	 * Set direct map to something invalid so that it won't be cached if
  	 * there are any accesses after the TLB flush, then flush the TLB and
  	 * reset the direct map permissions to the default.
  	 */
  	set_area_direct_map(area, set_direct_map_invalid_noflush);
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2145
  	_vm_unmap_aliases(start, end, flush_dmap);
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2146
2147
  	set_area_direct_map(area, set_direct_map_default_noflush);
  }
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2148
  static void __vunmap(const void *addr, int deallocate_pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2149
2150
2151
2152
2153
  {
  	struct vm_struct *area;
  
  	if (!addr)
  		return;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2154
2155
  	if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)
  ",
ab15d9b4c   Dan Carpenter   mm/vmalloc.c: unb...
2156
  			addr))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2157
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2158

6ade20327   Liviu Dudau   mm/vmalloc.c: don...
2159
  	area = find_vm_area(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2160
  	if (unlikely(!area)) {
4c8573e25   Arjan van de Ven   Use WARN() in mm/...
2161
2162
  		WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2163
  				addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2164
2165
  		return;
  	}
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
2166
2167
  	debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
  	debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
9a11b49a8   Ingo Molnar   [PATCH] lockdep: ...
2168

23713b480   Vincenzo Frascino   mm/vmalloc.c: fix...
2169
  	kasan_poison_vmalloc(area->addr, get_vm_area_size(area));
3c5c3cfb9   Daniel Axtens   kasan: support ba...
2170

868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2171
  	vm_remove_mappings(area, deallocate_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2172
2173
2174
2175
  	if (deallocate_pages) {
  		int i;
  
  		for (i = 0; i < area->nr_pages; i++) {
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2176
2177
2178
  			struct page *page = area->pages[i];
  
  			BUG_ON(!page);
4949148ad   Vladimir Davydov   mm: charge/unchar...
2179
  			__free_pages(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2180
  		}
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2181
  		atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2182

244d63ee3   David Rientjes   mm, vmalloc: remo...
2183
  		kvfree(area->pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2184
2185
2186
2187
2188
  	}
  
  	kfree(area);
  	return;
  }
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2189
2190
2191
2192
2193
2194
2195
  
  static inline void __vfree_deferred(const void *addr)
  {
  	/*
  	 * Use raw_cpu_ptr() because this can be called from preemptible
  	 * context. Preemption is absolutely fine here, because the llist_add()
  	 * implementation is lockless, so it works even if we are adding to
73221d888   Jeongtae Park   mm/vmalloc: fix a...
2196
  	 * another cpu's list. schedule_work() should be fine with this too.
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2197
2198
2199
2200
2201
2202
2203
2204
  	 */
  	struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
  
  	if (llist_add((struct llist_node *)addr, &p->list))
  		schedule_work(&p->wq);
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2205
2206
   * vfree_atomic - release memory allocated by vmalloc()
   * @addr:	  memory base address
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2207
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2208
2209
   * This one is just like vfree() but can be called in any atomic context
   * except NMIs.
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
   */
  void vfree_atomic(const void *addr)
  {
  	BUG_ON(in_nmi());
  
  	kmemleak_free(addr);
  
  	if (!addr)
  		return;
  	__vfree_deferred(addr);
  }
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2221
2222
2223
2224
2225
2226
2227
  static void __vfree(const void *addr)
  {
  	if (unlikely(in_interrupt()))
  		__vfree_deferred(addr);
  	else
  		__vunmap(addr, 1);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2228
  /**
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2229
2230
   * vfree - Release memory allocated by vmalloc()
   * @addr:  Memory base address
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2231
   *
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2232
2233
2234
2235
   * Free the virtually continuous memory area starting at @addr, as obtained
   * from one of the vmalloc() family of APIs.  This will usually also free the
   * physical memory underlying the virtual allocation, but that memory is
   * reference counted, so it will not be freed until the last user goes away.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2236
   *
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2237
   * If @addr is NULL, no operation is performed.
c9fcee513   Andrew Morton   mm/vmalloc.c: add...
2238
   *
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2239
   * Context:
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2240
   * May sleep if called *not* from interrupt context.
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2241
2242
2243
   * Must not be called in NMI context (strictly speaking, it could be
   * if we have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
   * conventions for vfree() arch-depenedent would be a really bad idea).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2244
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2245
  void vfree(const void *addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2246
  {
32fcfd407   Al Viro   make vfree() safe...
2247
  	BUG_ON(in_nmi());
89219d37a   Catalin Marinas   kmemleak: Add the...
2248
2249
  
  	kmemleak_free(addr);
a8dda165e   Andrey Ryabinin   vfree: add debug ...
2250
  	might_sleep_if(!in_interrupt());
32fcfd407   Al Viro   make vfree() safe...
2251
2252
  	if (!addr)
  		return;
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2253
2254
  
  	__vfree(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2255
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2256
2257
2258
  EXPORT_SYMBOL(vfree);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2259
2260
   * vunmap - release virtual mapping obtained by vmap()
   * @addr:   memory base address
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2261
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2262
2263
   * Free the virtually contiguous memory area starting at @addr,
   * which was created from the page array passed to vmap().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2264
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2265
   * Must not be called in interrupt context.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2266
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2267
  void vunmap(const void *addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2268
2269
  {
  	BUG_ON(in_interrupt());
34754b69a   Peter Zijlstra   x86: make vmap ye...
2270
  	might_sleep();
32fcfd407   Al Viro   make vfree() safe...
2271
2272
  	if (addr)
  		__vunmap(addr, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2273
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2274
2275
2276
  EXPORT_SYMBOL(vunmap);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2277
2278
2279
2280
2281
2282
   * vmap - map an array of pages into virtually contiguous space
   * @pages: array of page pointers
   * @count: number of pages to map
   * @flags: vm_area->flags
   * @prot: page protection for the mapping
   *
b944afc9d   Christoph Hellwig   mm: add a VM_MAP_...
2283
2284
2285
2286
2287
   * Maps @count pages from @pages into contiguous kernel virtual space.
   * If @flags contains %VM_MAP_PUT_PAGES the ownership of the pages array itself
   * (which must be kmalloc or vmalloc memory) and one reference per pages in it
   * are transferred from the caller to vmap(), and will be freed / dropped when
   * vfree() is called on the return value.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2288
2289
   *
   * Return: the address of the area or %NULL on failure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2290
2291
   */
  void *vmap(struct page **pages, unsigned int count,
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2292
  	   unsigned long flags, pgprot_t prot)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2293
2294
  {
  	struct vm_struct *area;
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2295
  	unsigned long size;		/* In bytes */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2296

34754b69a   Peter Zijlstra   x86: make vmap ye...
2297
  	might_sleep();
ca79b0c21   Arun KS   mm: convert total...
2298
  	if (count > totalram_pages())
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2299
  		return NULL;
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2300
2301
  	size = (unsigned long)count << PAGE_SHIFT;
  	area = get_vm_area_caller(size, flags, __builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2302
2303
  	if (!area)
  		return NULL;
230169693   Christoph Lameter   vmallocinfo: add ...
2304

cca98e9f8   Christoph Hellwig   mm: enforce that ...
2305
  	if (map_kernel_range((unsigned long)area->addr, size, pgprot_nx(prot),
ed1f324c5   Christoph Hellwig   mm: remove map_vm...
2306
  			pages) < 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2307
2308
2309
  		vunmap(area->addr);
  		return NULL;
  	}
b4ecc2596   Miaohe Lin   mm/vmalloc.c: fix...
2310
  	if (flags & VM_MAP_PUT_PAGES) {
b944afc9d   Christoph Hellwig   mm: add a VM_MAP_...
2311
  		area->pages = pages;
b4ecc2596   Miaohe Lin   mm/vmalloc.c: fix...
2312
2313
  		area->nr_pages = count;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2314
2315
  	return area->addr;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2316
  EXPORT_SYMBOL(vmap);
3e9a9e256   Christoph Hellwig   mm: add a vmap_pf...
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
  #ifdef CONFIG_VMAP_PFN
  struct vmap_pfn_data {
  	unsigned long	*pfns;
  	pgprot_t	prot;
  	unsigned int	idx;
  };
  
  static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private)
  {
  	struct vmap_pfn_data *data = private;
  
  	if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx])))
  		return -EINVAL;
  	*pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot));
  	return 0;
  }
  
  /**
   * vmap_pfn - map an array of PFNs into virtually contiguous space
   * @pfns: array of PFNs
   * @count: number of pages to map
   * @prot: page protection for the mapping
   *
   * Maps @count PFNs from @pfns into contiguous kernel virtual space and returns
   * the start address of the mapping.
   */
  void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
  {
  	struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) };
  	struct vm_struct *area;
  
  	area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP,
  			__builtin_return_address(0));
  	if (!area)
  		return NULL;
  	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
  			count * PAGE_SIZE, vmap_pfn_apply, &data)) {
  		free_vm_area(area);
  		return NULL;
  	}
  	return area->addr;
  }
  EXPORT_SYMBOL_GPL(vmap_pfn);
  #endif /* CONFIG_VMAP_PFN */
e31d9eb5c   Adrian Bunk   make __vmalloc_ar...
2361
  static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
3722e13cf   Wanpeng Li   mm/vmalloc: don't...
2362
  				 pgprot_t prot, int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2363
  {
930f036b4   David Rientjes   mm, vmalloc: cons...
2364
  	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
f255935b9   Christoph Hellwig   mm: cleanup the g...
2365
2366
2367
  	unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
  	unsigned int array_size = nr_pages * sizeof(struct page *), i;
  	struct page **pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2368

f255935b9   Christoph Hellwig   mm: cleanup the g...
2369
2370
2371
  	gfp_mask |= __GFP_NOWARN;
  	if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
  		gfp_mask |= __GFP_HIGHMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2372

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2373
  	/* Please note that the recursion is strictly bounded. */
8757d5fa6   Jan Kiszka   [PATCH] mm: fix o...
2374
  	if (array_size > PAGE_SIZE) {
f255935b9   Christoph Hellwig   mm: cleanup the g...
2375
2376
  		pages = __vmalloc_node(array_size, 1, nested_gfp, node,
  					area->caller);
286e1ea3a   Andrew Morton   [PATCH] vmalloc()...
2377
  	} else {
976d6dfbb   Jan Beulich   vmalloc(): adjust...
2378
  		pages = kmalloc_node(array_size, nested_gfp, node);
286e1ea3a   Andrew Morton   [PATCH] vmalloc()...
2379
  	}
7ea362427   Austin Kim   mm/vmalloc.c: mov...
2380
2381
  
  	if (!pages) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2382
2383
2384
2385
  		remove_vm_area(area->addr);
  		kfree(area);
  		return NULL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2386

7ea362427   Austin Kim   mm/vmalloc.c: mov...
2387
2388
  	area->pages = pages;
  	area->nr_pages = nr_pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2389
  	for (i = 0; i < area->nr_pages; i++) {
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2390
  		struct page *page;
4b90951c0   Jianguo Wu   mm/vmalloc: use N...
2391
  		if (node == NUMA_NO_NODE)
f255935b9   Christoph Hellwig   mm: cleanup the g...
2392
  			page = alloc_page(gfp_mask);
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2393
  		else
f255935b9   Christoph Hellwig   mm: cleanup the g...
2394
  			page = alloc_pages_node(node, gfp_mask, 0);
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2395
2396
  
  		if (unlikely(!page)) {
82afbc32f   Hui Su   mm/vmalloc.c: upd...
2397
  			/* Successfully allocated i pages, free them in __vfree() */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2398
  			area->nr_pages = i;
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2399
  			atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2400
2401
  			goto fail;
  		}
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2402
  		area->pages[i] = page;
dcf61ff06   Liu Xiang   mm/vmalloc.c: rem...
2403
  		if (gfpflags_allow_blocking(gfp_mask))
660654f90   Eric Dumazet   mm/vmalloc.c: add...
2404
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2405
  	}
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2406
  	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2407

ed1f324c5   Christoph Hellwig   mm: remove map_vm...
2408
2409
  	if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area),
  			prot, pages) < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2410
  		goto fail;
ed1f324c5   Christoph Hellwig   mm: remove map_vm...
2411

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2412
2413
2414
  	return area->addr;
  
  fail:
a8e99259e   Michal Hocko   mm, page_alloc: w...
2415
  	warn_alloc(gfp_mask, NULL,
7877cdcc3   Michal Hocko   mm: consolidate w...
2416
  			  "vmalloc: allocation failure, allocated %ld of %ld bytes",
22943ab11   Dave Hansen   mm: print vmalloc...
2417
  			  (area->nr_pages*PAGE_SIZE), area->size);
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2418
  	__vfree(area->addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2419
2420
2421
2422
  	return NULL;
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
   * __vmalloc_node_range - allocate virtually contiguous memory
   * @size:		  allocation size
   * @align:		  desired alignment
   * @start:		  vm area range start
   * @end:		  vm area range end
   * @gfp_mask:		  flags for the page level allocator
   * @prot:		  protection mask for the allocated pages
   * @vm_flags:		  additional vm area flags (e.g. %VM_NO_GUARD)
   * @node:		  node to use for allocation or NUMA_NO_NODE
   * @caller:		  caller's return address
   *
   * Allocate enough pages to cover @size from the page level
   * allocator with @gfp_mask flags.  Map them into contiguous
   * kernel virtual space, using a pagetable protection of @prot.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2437
2438
   *
   * Return: the address of the area or %NULL on failure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2439
   */
d0a21265d   David Rientjes   mm: unify module_...
2440
2441
  void *__vmalloc_node_range(unsigned long size, unsigned long align,
  			unsigned long start, unsigned long end, gfp_t gfp_mask,
cb9e3c292   Andrey Ryabinin   mm: vmalloc: pass...
2442
2443
  			pgprot_t prot, unsigned long vm_flags, int node,
  			const void *caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2444
2445
  {
  	struct vm_struct *area;
89219d37a   Catalin Marinas   kmemleak: Add the...
2446
2447
  	void *addr;
  	unsigned long real_size = size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2448
2449
  
  	size = PAGE_ALIGN(size);
ca79b0c21   Arun KS   mm: convert total...
2450
  	if (!size || (size >> PAGE_SHIFT) > totalram_pages())
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2451
  		goto fail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2452

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2453
  	area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
cb9e3c292   Andrey Ryabinin   mm: vmalloc: pass...
2454
  				vm_flags, start, end, node, gfp_mask, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2455
  	if (!area)
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2456
  		goto fail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2457

3722e13cf   Wanpeng Li   mm/vmalloc: don't...
2458
  	addr = __vmalloc_area_node(area, gfp_mask, prot, node);
1368edf06   Mel Gorman   mm: vmalloc: chec...
2459
  	if (!addr)
b82225f3f   Wanpeng Li   revert mm/vmalloc...
2460
  		return NULL;
89219d37a   Catalin Marinas   kmemleak: Add the...
2461
2462
  
  	/*
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2463
2464
  	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED
  	 * flag. It means that vm_struct is not fully initialized.
4341fa454   Joonsoo Kim   mm, vmalloc: remo...
2465
  	 * Now, it is fully initialized, so remove this flag here.
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2466
  	 */
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2467
  	clear_vm_uninitialized_flag(area);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2468

94f4a1618   Catalin Marinas   mm: kmemleak: tre...
2469
  	kmemleak_vmalloc(area, size, gfp_mask);
89219d37a   Catalin Marinas   kmemleak: Add the...
2470
2471
  
  	return addr;
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2472
2473
  
  fail:
a8e99259e   Michal Hocko   mm, page_alloc: w...
2474
  	warn_alloc(gfp_mask, NULL,
7877cdcc3   Michal Hocko   mm: consolidate w...
2475
  			  "vmalloc: allocation failure: %lu bytes", real_size);
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2476
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2477
  }
d0a21265d   David Rientjes   mm: unify module_...
2478
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2479
2480
2481
2482
   * __vmalloc_node - allocate virtually contiguous memory
   * @size:	    allocation size
   * @align:	    desired alignment
   * @gfp_mask:	    flags for the page level allocator
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2483
2484
   * @node:	    node to use for allocation or NUMA_NO_NODE
   * @caller:	    caller's return address
a7c3e901a   Michal Hocko   mm: introduce kv[...
2485
   *
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2486
2487
   * Allocate enough pages to cover @size from the page level allocator with
   * @gfp_mask flags.  Map them into contiguous kernel virtual space.
a7c3e901a   Michal Hocko   mm: introduce kv[...
2488
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2489
2490
   * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
   * and __GFP_NOFAIL are not supported
a7c3e901a   Michal Hocko   mm: introduce kv[...
2491
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2492
2493
   * Any use of gfp flags outside of GFP_KERNEL should be consulted
   * with mm people.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2494
2495
   *
   * Return: pointer to the allocated memory or %NULL on error
d0a21265d   David Rientjes   mm: unify module_...
2496
   */
2b9059489   Christoph Hellwig   mm: remove __vmal...
2497
  void *__vmalloc_node(unsigned long size, unsigned long align,
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2498
  			    gfp_t gfp_mask, int node, const void *caller)
d0a21265d   David Rientjes   mm: unify module_...
2499
2500
  {
  	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2501
  				gfp_mask, PAGE_KERNEL, 0, node, caller);
d0a21265d   David Rientjes   mm: unify module_...
2502
  }
c3f896dcf   Christoph Hellwig   mm: switch the te...
2503
2504
2505
2506
2507
2508
2509
2510
  /*
   * This is only for performance analysis of vmalloc and stress purpose.
   * It is required by vmalloc test module, therefore do not use it other
   * than that.
   */
  #ifdef CONFIG_TEST_VMALLOC_MODULE
  EXPORT_SYMBOL_GPL(__vmalloc_node);
  #endif
d0a21265d   David Rientjes   mm: unify module_...
2511

88dca4ca5   Christoph Hellwig   mm: remove the pg...
2512
  void *__vmalloc(unsigned long size, gfp_t gfp_mask)
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2513
  {
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2514
  	return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
230169693   Christoph Lameter   vmallocinfo: add ...
2515
  				__builtin_return_address(0));
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2516
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2517
2518
2519
  EXPORT_SYMBOL(__vmalloc);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2520
2521
2522
2523
2524
   * vmalloc - allocate virtually contiguous memory
   * @size:    allocation size
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2525
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2526
2527
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2528
2529
   *
   * Return: pointer to the allocated memory or %NULL on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2530
2531
2532
   */
  void *vmalloc(unsigned long size)
  {
4d39d7285   Christoph Hellwig   mm: remove both i...
2533
2534
  	return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
  				__builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2535
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2536
  EXPORT_SYMBOL(vmalloc);
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2537
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2538
2539
2540
2541
2542
2543
2544
2545
2546
   * vzalloc - allocate virtually contiguous memory with zero fill
   * @size:    allocation size
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
   * The memory allocated is set to zero.
   *
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2547
2548
   *
   * Return: pointer to the allocated memory or %NULL on error
e1ca7788d   Dave Young   mm: add vzalloc()...
2549
2550
2551
   */
  void *vzalloc(unsigned long size)
  {
4d39d7285   Christoph Hellwig   mm: remove both i...
2552
2553
  	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
  				__builtin_return_address(0));
e1ca7788d   Dave Young   mm: add vzalloc()...
2554
2555
2556
2557
  }
  EXPORT_SYMBOL(vzalloc);
  
  /**
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2558
2559
   * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
   * @size: allocation size
833423143   Nick Piggin   [PATCH] mm: intro...
2560
   *
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2561
2562
   * The resulting memory area is zeroed so it can be mapped to userspace
   * without leaking data.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2563
2564
   *
   * Return: pointer to the allocated memory or %NULL on error
833423143   Nick Piggin   [PATCH] mm: intro...
2565
2566
2567
   */
  void *vmalloc_user(unsigned long size)
  {
bc84c5352   Roman Penyaev   mm/vmalloc: pass ...
2568
2569
2570
2571
  	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
  				    GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
  				    VM_USERMAP, NUMA_NO_NODE,
  				    __builtin_return_address(0));
833423143   Nick Piggin   [PATCH] mm: intro...
2572
2573
2574
2575
  }
  EXPORT_SYMBOL(vmalloc_user);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2576
2577
2578
   * vmalloc_node - allocate memory on a specific node
   * @size:	  allocation size
   * @node:	  numa node
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2579
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2580
2581
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2582
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2583
2584
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2585
2586
   *
   * Return: pointer to the allocated memory or %NULL on error
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2587
2588
2589
   */
  void *vmalloc_node(unsigned long size, int node)
  {
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2590
2591
  	return __vmalloc_node(size, 1, GFP_KERNEL, node,
  			__builtin_return_address(0));
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2592
2593
  }
  EXPORT_SYMBOL(vmalloc_node);
e1ca7788d   Dave Young   mm: add vzalloc()...
2594
2595
2596
2597
2598
2599
2600
2601
2602
  /**
   * vzalloc_node - allocate memory on a specific node with zero fill
   * @size:	allocation size
   * @node:	numa node
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
   * The memory allocated is set to zero.
   *
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2603
   * Return: pointer to the allocated memory or %NULL on error
e1ca7788d   Dave Young   mm: add vzalloc()...
2604
2605
2606
   */
  void *vzalloc_node(unsigned long size, int node)
  {
4d39d7285   Christoph Hellwig   mm: remove both i...
2607
2608
  	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
  				__builtin_return_address(0));
e1ca7788d   Dave Young   mm: add vzalloc()...
2609
2610
  }
  EXPORT_SYMBOL(vzalloc_node);
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2611
  #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
698d0831b   Michal Hocko   vmalloc: fix __GF...
2612
  #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2613
  #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
698d0831b   Michal Hocko   vmalloc: fix __GF...
2614
  #define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2615
  #else
698d0831b   Michal Hocko   vmalloc: fix __GF...
2616
2617
2618
2619
2620
  /*
   * 64b systems should always have either DMA or DMA32 zones. For others
   * GFP_DMA32 should do the right thing and use the normal zone.
   */
  #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2621
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2622
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2623
2624
   * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
   * @size:	allocation size
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2625
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2626
2627
   * Allocate enough 32bit PA addressable pages to cover @size from the
   * page level allocator and map them into contiguous kernel virtual space.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2628
2629
   *
   * Return: pointer to the allocated memory or %NULL on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2630
2631
2632
   */
  void *vmalloc_32(unsigned long size)
  {
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2633
2634
  	return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
  			__builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2635
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2636
  EXPORT_SYMBOL(vmalloc_32);
833423143   Nick Piggin   [PATCH] mm: intro...
2637
  /**
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2638
   * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2639
   * @size:	     allocation size
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2640
2641
2642
   *
   * The resulting memory area is 32bit addressable and zeroed so it can be
   * mapped to userspace without leaking data.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2643
2644
   *
   * Return: pointer to the allocated memory or %NULL on error
833423143   Nick Piggin   [PATCH] mm: intro...
2645
2646
2647
   */
  void *vmalloc_32_user(unsigned long size)
  {
bc84c5352   Roman Penyaev   mm/vmalloc: pass ...
2648
2649
2650
2651
  	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
  				    GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
  				    VM_USERMAP, NUMA_NO_NODE,
  				    __builtin_return_address(0));
833423143   Nick Piggin   [PATCH] mm: intro...
2652
2653
  }
  EXPORT_SYMBOL(vmalloc_32_user);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
  /*
   * small helper routine , copy contents to buf from addr.
   * If the page is not present, fill zero.
   */
  
  static int aligned_vread(char *buf, char *addr, unsigned long count)
  {
  	struct page *p;
  	int copied = 0;
  
  	while (count) {
  		unsigned long offset, length;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
2666
  		offset = offset_in_page(addr);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
  		length = PAGE_SIZE - offset;
  		if (length > count)
  			length = count;
  		p = vmalloc_to_page(addr);
  		/*
  		 * To do safe access to this _mapped_ area, we need
  		 * lock. But adding lock here means that we need to add
  		 * overhead of vmalloc()/vfree() calles for this _debug_
  		 * interface, rarely used. Instead of that, we'll use
  		 * kmap() and get small overhead in this access function.
  		 */
  		if (p) {
  			/*
  			 * we can expect USER0 is not used (see vread/vwrite's
  			 * function description)
  			 */
9b04c5fec   Cong Wang   mm: remove the se...
2683
  			void *map = kmap_atomic(p);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2684
  			memcpy(buf, map + offset, length);
9b04c5fec   Cong Wang   mm: remove the se...
2685
  			kunmap_atomic(map);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
  		} else
  			memset(buf, 0, length);
  
  		addr += length;
  		buf += length;
  		copied += length;
  		count -= length;
  	}
  	return copied;
  }
  
  static int aligned_vwrite(char *buf, char *addr, unsigned long count)
  {
  	struct page *p;
  	int copied = 0;
  
  	while (count) {
  		unsigned long offset, length;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
2704
  		offset = offset_in_page(addr);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
  		length = PAGE_SIZE - offset;
  		if (length > count)
  			length = count;
  		p = vmalloc_to_page(addr);
  		/*
  		 * To do safe access to this _mapped_ area, we need
  		 * lock. But adding lock here means that we need to add
  		 * overhead of vmalloc()/vfree() calles for this _debug_
  		 * interface, rarely used. Instead of that, we'll use
  		 * kmap() and get small overhead in this access function.
  		 */
  		if (p) {
  			/*
  			 * we can expect USER0 is not used (see vread/vwrite's
  			 * function description)
  			 */
9b04c5fec   Cong Wang   mm: remove the se...
2721
  			void *map = kmap_atomic(p);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2722
  			memcpy(map + offset, buf, length);
9b04c5fec   Cong Wang   mm: remove the se...
2723
  			kunmap_atomic(map);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
  		}
  		addr += length;
  		buf += length;
  		copied += length;
  		count -= length;
  	}
  	return copied;
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2734
2735
2736
2737
2738
   * vread() - read vmalloc area in a safe way.
   * @buf:     buffer for reading data
   * @addr:    vm address.
   * @count:   number of bytes to be read.
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
   * This function checks that addr is a valid vmalloc'ed area, and
   * copy data from that area to a given buffer. If the given memory range
   * of [addr...addr+count) includes some valid address, data is copied to
   * proper area of @buf. If there are memory holes, they'll be zero-filled.
   * IOREMAP area is treated as memory hole and no copy is done.
   *
   * If [addr...addr+count) doesn't includes any intersects with alive
   * vm_struct area, returns 0. @buf should be kernel's buffer.
   *
   * Note: In usual ops, vread() is never necessary because the caller
   * should know vmalloc() area is valid and can use memcpy().
   * This is for routines which have to access vmalloc area without
d9009d67f   Geert Uytterhoeven   mm/vmalloc.c: spe...
2751
   * any information, as /dev/kmem.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2752
2753
2754
2755
   *
   * Return: number of bytes for which addr and buf should be increased
   * (same number as @count) or %0 if [addr...addr+count) doesn't
   * include any intersection with valid vmalloc area
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2756
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2757
2758
  long vread(char *buf, char *addr, unsigned long count)
  {
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2759
2760
  	struct vmap_area *va;
  	struct vm_struct *vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2761
  	char *vaddr, *buf_start = buf;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2762
  	unsigned long buflen = count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2763
2764
2765
2766
2767
  	unsigned long n;
  
  	/* Don't allow overflow */
  	if ((unsigned long) addr + count < count)
  		count = -(unsigned long) addr;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2768
2769
2770
2771
  	spin_lock(&vmap_area_lock);
  	list_for_each_entry(va, &vmap_area_list, list) {
  		if (!count)
  			break;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2772
  		if (!va->vm)
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2773
2774
2775
2776
  			continue;
  
  		vm = va->vm;
  		vaddr = (char *) vm->addr;
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2777
  		if (addr >= vaddr + get_vm_area_size(vm))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2778
2779
2780
2781
2782
2783
2784
2785
2786
  			continue;
  		while (addr < vaddr) {
  			if (count == 0)
  				goto finished;
  			*buf = '\0';
  			buf++;
  			addr++;
  			count--;
  		}
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2787
  		n = vaddr + get_vm_area_size(vm) - addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2788
2789
  		if (n > count)
  			n = count;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2790
  		if (!(vm->flags & VM_IOREMAP))
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2791
2792
2793
2794
2795
2796
  			aligned_vread(buf, addr, n);
  		else /* IOREMAP area is treated as memory hole */
  			memset(buf, 0, n);
  		buf += n;
  		addr += n;
  		count -= n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2797
2798
  	}
  finished:
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2799
  	spin_unlock(&vmap_area_lock);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2800
2801
2802
2803
2804
2805
2806
2807
  
  	if (buf == buf_start)
  		return 0;
  	/* zero-fill memory holes */
  	if (buf != buf_start + buflen)
  		memset(buf, 0, buflen - (buf - buf_start));
  
  	return buflen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2808
  }
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2809
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2810
2811
2812
2813
2814
   * vwrite() - write vmalloc area in a safe way.
   * @buf:      buffer for source data
   * @addr:     vm address.
   * @count:    number of bytes to be read.
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
   * This function checks that addr is a valid vmalloc'ed area, and
   * copy data from a buffer to the given addr. If specified range of
   * [addr...addr+count) includes some valid address, data is copied from
   * proper area of @buf. If there are memory holes, no copy to hole.
   * IOREMAP area is treated as memory hole and no copy is done.
   *
   * If [addr...addr+count) doesn't includes any intersects with alive
   * vm_struct area, returns 0. @buf should be kernel's buffer.
   *
   * Note: In usual ops, vwrite() is never necessary because the caller
   * should know vmalloc() area is valid and can use memcpy().
   * This is for routines which have to access vmalloc area without
d9009d67f   Geert Uytterhoeven   mm/vmalloc.c: spe...
2827
   * any information, as /dev/kmem.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2828
2829
2830
2831
   *
   * Return: number of bytes for which addr and buf should be
   * increased (same number as @count) or %0 if [addr...addr+count)
   * doesn't include any intersection with valid vmalloc area
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2832
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2833
2834
  long vwrite(char *buf, char *addr, unsigned long count)
  {
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2835
2836
  	struct vmap_area *va;
  	struct vm_struct *vm;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2837
2838
2839
  	char *vaddr;
  	unsigned long n, buflen;
  	int copied = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2840
2841
2842
2843
  
  	/* Don't allow overflow */
  	if ((unsigned long) addr + count < count)
  		count = -(unsigned long) addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2844
  	buflen = count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2845

e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2846
2847
2848
2849
  	spin_lock(&vmap_area_lock);
  	list_for_each_entry(va, &vmap_area_list, list) {
  		if (!count)
  			break;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2850
  		if (!va->vm)
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2851
2852
2853
2854
  			continue;
  
  		vm = va->vm;
  		vaddr = (char *) vm->addr;
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2855
  		if (addr >= vaddr + get_vm_area_size(vm))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2856
2857
2858
2859
2860
2861
2862
2863
  			continue;
  		while (addr < vaddr) {
  			if (count == 0)
  				goto finished;
  			buf++;
  			addr++;
  			count--;
  		}
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2864
  		n = vaddr + get_vm_area_size(vm) - addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2865
2866
  		if (n > count)
  			n = count;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2867
  		if (!(vm->flags & VM_IOREMAP)) {
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2868
2869
2870
2871
2872
2873
  			aligned_vwrite(buf, addr, n);
  			copied++;
  		}
  		buf += n;
  		addr += n;
  		count -= n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2874
2875
  	}
  finished:
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2876
  	spin_unlock(&vmap_area_lock);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2877
2878
2879
  	if (!copied)
  		return 0;
  	return buflen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2880
  }
833423143   Nick Piggin   [PATCH] mm: intro...
2881
2882
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2883
2884
2885
2886
   * remap_vmalloc_range_partial - map vmalloc pages to userspace
   * @vma:		vma to cover
   * @uaddr:		target user address to start at
   * @kaddr:		virtual address of vmalloc kernel memory
bdebd6a28   Jann Horn   vmalloc: fix rema...
2887
   * @pgoff:		offset from @kaddr to start at
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2888
   * @size:		size of map area
7682486b3   Randy Dunlap   mm: fix various k...
2889
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2890
   * Returns:	0 for success, -Exxx on failure
833423143   Nick Piggin   [PATCH] mm: intro...
2891
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2892
2893
2894
2895
   * This function checks that @kaddr is a valid vmalloc'ed area,
   * and that it is big enough to cover the range starting at
   * @uaddr in @vma. Will return failure if that criteria isn't
   * met.
833423143   Nick Piggin   [PATCH] mm: intro...
2896
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2897
   * Similar to remap_pfn_range() (see mm/memory.c)
833423143   Nick Piggin   [PATCH] mm: intro...
2898
   */
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2899
  int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
bdebd6a28   Jann Horn   vmalloc: fix rema...
2900
2901
  				void *kaddr, unsigned long pgoff,
  				unsigned long size)
833423143   Nick Piggin   [PATCH] mm: intro...
2902
2903
  {
  	struct vm_struct *area;
bdebd6a28   Jann Horn   vmalloc: fix rema...
2904
2905
2906
2907
2908
  	unsigned long off;
  	unsigned long end_index;
  
  	if (check_shl_overflow(pgoff, PAGE_SHIFT, &off))
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
2909

e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2910
2911
2912
  	size = PAGE_ALIGN(size);
  
  	if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
833423143   Nick Piggin   [PATCH] mm: intro...
2913
  		return -EINVAL;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2914
  	area = find_vm_area(kaddr);
833423143   Nick Piggin   [PATCH] mm: intro...
2915
  	if (!area)
db64fe022   Nick Piggin   mm: rewrite vmap ...
2916
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
2917

fe9041c24   Christoph Hellwig   vmalloc: lift the...
2918
  	if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
db64fe022   Nick Piggin   mm: rewrite vmap ...
2919
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
2920

bdebd6a28   Jann Horn   vmalloc: fix rema...
2921
2922
  	if (check_add_overflow(size, off, &end_index) ||
  	    end_index > get_vm_area_size(area))
db64fe022   Nick Piggin   mm: rewrite vmap ...
2923
  		return -EINVAL;
bdebd6a28   Jann Horn   vmalloc: fix rema...
2924
  	kaddr += off;
833423143   Nick Piggin   [PATCH] mm: intro...
2925

833423143   Nick Piggin   [PATCH] mm: intro...
2926
  	do {
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2927
  		struct page *page = vmalloc_to_page(kaddr);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2928
  		int ret;
833423143   Nick Piggin   [PATCH] mm: intro...
2929
2930
2931
2932
2933
  		ret = vm_insert_page(vma, uaddr, page);
  		if (ret)
  			return ret;
  
  		uaddr += PAGE_SIZE;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2934
2935
2936
  		kaddr += PAGE_SIZE;
  		size -= PAGE_SIZE;
  	} while (size > 0);
833423143   Nick Piggin   [PATCH] mm: intro...
2937

314e51b98   Konstantin Khlebnikov   mm: kill vma flag...
2938
  	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
833423143   Nick Piggin   [PATCH] mm: intro...
2939

db64fe022   Nick Piggin   mm: rewrite vmap ...
2940
  	return 0;
833423143   Nick Piggin   [PATCH] mm: intro...
2941
  }
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2942
2943
2944
  EXPORT_SYMBOL(remap_vmalloc_range_partial);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2945
2946
2947
2948
   * remap_vmalloc_range - map vmalloc pages to userspace
   * @vma:		vma to cover (map full range of vma)
   * @addr:		vmalloc memory
   * @pgoff:		number of pages into addr before first page to map
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2949
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2950
   * Returns:	0 for success, -Exxx on failure
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2951
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2952
2953
2954
   * This function checks that addr is a valid vmalloc'ed area, and
   * that it is big enough to cover the vma. Will return failure if
   * that criteria isn't met.
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2955
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2956
   * Similar to remap_pfn_range() (see mm/memory.c)
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2957
2958
2959
2960
2961
   */
  int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
  						unsigned long pgoff)
  {
  	return remap_vmalloc_range_partial(vma, vma->vm_start,
bdebd6a28   Jann Horn   vmalloc: fix rema...
2962
  					   addr, pgoff,
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2963
2964
  					   vma->vm_end - vma->vm_start);
  }
833423143   Nick Piggin   [PATCH] mm: intro...
2965
  EXPORT_SYMBOL(remap_vmalloc_range);
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2966
2967
2968
2969
2970
2971
2972
2973
  void free_vm_area(struct vm_struct *area)
  {
  	struct vm_struct *ret;
  	ret = remove_vm_area(area->addr);
  	BUG_ON(ret != area);
  	kfree(area);
  }
  EXPORT_SYMBOL_GPL(free_vm_area);
a10aa5798   Christoph Lameter   vmalloc: show vma...
2974

4f8b02b4e   Tejun Heo   vmalloc: pcpu_get...
2975
  #ifdef CONFIG_SMP
ca23e405e   Tejun Heo   vmalloc: implemen...
2976
2977
  static struct vmap_area *node_to_va(struct rb_node *n)
  {
4583e7731   Geliang Tang   mm/vmalloc.c: use...
2978
  	return rb_entry_safe(n, struct vmap_area, rb_node);
ca23e405e   Tejun Heo   vmalloc: implemen...
2979
2980
2981
  }
  
  /**
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2982
2983
   * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
   * @addr: target address
ca23e405e   Tejun Heo   vmalloc: implemen...
2984
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2985
2986
2987
2988
   * Returns: vmap_area if it is found. If there is no such area
   *   the first highest(reverse order) vmap_area is returned
   *   i.e. va->va_start < addr && va->va_end < addr or NULL
   *   if there are no any areas before @addr.
ca23e405e   Tejun Heo   vmalloc: implemen...
2989
   */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2990
2991
  static struct vmap_area *
  pvm_find_va_enclose_addr(unsigned long addr)
ca23e405e   Tejun Heo   vmalloc: implemen...
2992
  {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2993
2994
2995
2996
2997
  	struct vmap_area *va, *tmp;
  	struct rb_node *n;
  
  	n = free_vmap_area_root.rb_node;
  	va = NULL;
ca23e405e   Tejun Heo   vmalloc: implemen...
2998
2999
  
  	while (n) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3000
3001
3002
3003
3004
  		tmp = rb_entry(n, struct vmap_area, rb_node);
  		if (tmp->va_start <= addr) {
  			va = tmp;
  			if (tmp->va_end >= addr)
  				break;
ca23e405e   Tejun Heo   vmalloc: implemen...
3005
  			n = n->rb_right;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3006
3007
3008
  		} else {
  			n = n->rb_left;
  		}
ca23e405e   Tejun Heo   vmalloc: implemen...
3009
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3010
  	return va;
ca23e405e   Tejun Heo   vmalloc: implemen...
3011
3012
3013
  }
  
  /**
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3014
3015
3016
3017
3018
   * pvm_determine_end_from_reverse - find the highest aligned address
   * of free block below VMALLOC_END
   * @va:
   *   in - the VA we start the search(reverse order);
   *   out - the VA with the highest aligned end address.
ca23e405e   Tejun Heo   vmalloc: implemen...
3019
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3020
   * Returns: determined end address within vmap_area
ca23e405e   Tejun Heo   vmalloc: implemen...
3021
   */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3022
3023
  static unsigned long
  pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
ca23e405e   Tejun Heo   vmalloc: implemen...
3024
  {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3025
  	unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
ca23e405e   Tejun Heo   vmalloc: implemen...
3026
  	unsigned long addr;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3027
3028
3029
3030
3031
3032
3033
  	if (likely(*va)) {
  		list_for_each_entry_from_reverse((*va),
  				&free_vmap_area_list, list) {
  			addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
  			if ((*va)->va_start < addr)
  				return addr;
  		}
ca23e405e   Tejun Heo   vmalloc: implemen...
3034
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3035
  	return 0;
ca23e405e   Tejun Heo   vmalloc: implemen...
3036
3037
3038
3039
3040
3041
3042
3043
  }
  
  /**
   * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
   * @offsets: array containing offset of each area
   * @sizes: array containing size of each area
   * @nr_vms: the number of areas to allocate
   * @align: alignment, all entries in @offsets and @sizes must be aligned to this
ca23e405e   Tejun Heo   vmalloc: implemen...
3044
3045
3046
3047
3048
3049
   *
   * Returns: kmalloc'd vm_struct pointer array pointing to allocated
   *	    vm_structs on success, %NULL on failure
   *
   * Percpu allocator wants to use congruent vm areas so that it can
   * maintain the offsets among percpu areas.  This function allocates
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3050
3051
3052
3053
   * congruent vmalloc areas for it with GFP_KERNEL.  These areas tend to
   * be scattered pretty far, distance between two areas easily going up
   * to gigabytes.  To avoid interacting with regular vmallocs, these
   * areas are allocated from top.
ca23e405e   Tejun Heo   vmalloc: implemen...
3054
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3055
3056
3057
3058
3059
3060
   * Despite its complicated look, this allocator is rather simple. It
   * does everything top-down and scans free blocks from the end looking
   * for matching base. While scanning, if any of the areas do not fit the
   * base address is pulled down to fit the area. Scanning is repeated till
   * all the areas fit and then all necessary data structures are inserted
   * and the result is returned.
ca23e405e   Tejun Heo   vmalloc: implemen...
3061
3062
3063
   */
  struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
  				     const size_t *sizes, int nr_vms,
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3064
  				     size_t align)
ca23e405e   Tejun Heo   vmalloc: implemen...
3065
3066
3067
  {
  	const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
  	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3068
  	struct vmap_area **vas, *va;
ca23e405e   Tejun Heo   vmalloc: implemen...
3069
3070
  	struct vm_struct **vms;
  	int area, area2, last_area, term_area;
253a496d8   Daniel Axtens   kasan: don't assu...
3071
  	unsigned long base, start, size, end, last_end, orig_start, orig_end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3072
  	bool purged = false;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3073
  	enum fit_type type;
ca23e405e   Tejun Heo   vmalloc: implemen...
3074

ca23e405e   Tejun Heo   vmalloc: implemen...
3075
  	/* verify parameters and allocate data structures */
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
3076
  	BUG_ON(offset_in_page(align) || !is_power_of_2(align));
ca23e405e   Tejun Heo   vmalloc: implemen...
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
  	for (last_area = 0, area = 0; area < nr_vms; area++) {
  		start = offsets[area];
  		end = start + sizes[area];
  
  		/* is everything aligned properly? */
  		BUG_ON(!IS_ALIGNED(offsets[area], align));
  		BUG_ON(!IS_ALIGNED(sizes[area], align));
  
  		/* detect the area with the highest address */
  		if (start > offsets[last_area])
  			last_area = area;
c568da282   Wei Yang   mm/vmalloc.c: hal...
3088
  		for (area2 = area + 1; area2 < nr_vms; area2++) {
ca23e405e   Tejun Heo   vmalloc: implemen...
3089
3090
  			unsigned long start2 = offsets[area2];
  			unsigned long end2 = start2 + sizes[area2];
c568da282   Wei Yang   mm/vmalloc.c: hal...
3091
  			BUG_ON(start2 < end && start < end2);
ca23e405e   Tejun Heo   vmalloc: implemen...
3092
3093
3094
3095
3096
3097
3098
3099
  		}
  	}
  	last_end = offsets[last_area] + sizes[last_area];
  
  	if (vmalloc_end - vmalloc_start < last_end) {
  		WARN_ON(true);
  		return NULL;
  	}
4d67d8605   Thomas Meyer   mm: use kcalloc()...
3100
3101
  	vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
  	vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
ca23e405e   Tejun Heo   vmalloc: implemen...
3102
  	if (!vas || !vms)
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3103
  		goto err_free2;
ca23e405e   Tejun Heo   vmalloc: implemen...
3104
3105
  
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3106
  		vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3107
  		vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
ca23e405e   Tejun Heo   vmalloc: implemen...
3108
3109
3110
3111
  		if (!vas[area] || !vms[area])
  			goto err_free;
  	}
  retry:
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3112
  	spin_lock(&free_vmap_area_lock);
ca23e405e   Tejun Heo   vmalloc: implemen...
3113
3114
3115
3116
3117
  
  	/* start scanning - we scan from the top, begin with the last area */
  	area = term_area = last_area;
  	start = offsets[area];
  	end = start + sizes[area];
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3118
3119
  	va = pvm_find_va_enclose_addr(vmalloc_end);
  	base = pvm_determine_end_from_reverse(&va, align) - end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3120
3121
  
  	while (true) {
ca23e405e   Tejun Heo   vmalloc: implemen...
3122
3123
3124
3125
  		/*
  		 * base might have underflowed, add last_end before
  		 * comparing.
  		 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3126
3127
  		if (base + last_end < vmalloc_start + last_end)
  			goto overflow;
ca23e405e   Tejun Heo   vmalloc: implemen...
3128
3129
  
  		/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3130
  		 * Fitting base has not been found.
ca23e405e   Tejun Heo   vmalloc: implemen...
3131
  		 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3132
3133
  		if (va == NULL)
  			goto overflow;
ca23e405e   Tejun Heo   vmalloc: implemen...
3134
3135
  
  		/*
d8cc323d9   Qiujun Huang   mm/vmalloc: fix a...
3136
  		 * If required width exceeds current VA block, move
5336e52c9   Kuppuswamy Sathyanarayanan   mm/vmalloc.c: fix...
3137
3138
3139
3140
3141
3142
3143
3144
3145
  		 * base downwards and then recheck.
  		 */
  		if (base + end > va->va_end) {
  			base = pvm_determine_end_from_reverse(&va, align) - end;
  			term_area = area;
  			continue;
  		}
  
  		/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3146
  		 * If this VA does not fit, move base downwards and recheck.
ca23e405e   Tejun Heo   vmalloc: implemen...
3147
  		 */
5336e52c9   Kuppuswamy Sathyanarayanan   mm/vmalloc.c: fix...
3148
  		if (base + start < va->va_start) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3149
3150
  			va = node_to_va(rb_prev(&va->rb_node));
  			base = pvm_determine_end_from_reverse(&va, align) - end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
  			term_area = area;
  			continue;
  		}
  
  		/*
  		 * This area fits, move on to the previous one.  If
  		 * the previous one is the terminal one, we're done.
  		 */
  		area = (area + nr_vms - 1) % nr_vms;
  		if (area == term_area)
  			break;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3162

ca23e405e   Tejun Heo   vmalloc: implemen...
3163
3164
  		start = offsets[area];
  		end = start + sizes[area];
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3165
  		va = pvm_find_va_enclose_addr(base + end);
ca23e405e   Tejun Heo   vmalloc: implemen...
3166
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3167

ca23e405e   Tejun Heo   vmalloc: implemen...
3168
3169
  	/* we've found a fitting base, insert all va's */
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3170
  		int ret;
ca23e405e   Tejun Heo   vmalloc: implemen...
3171

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3172
3173
  		start = base + offsets[area];
  		size = sizes[area];
ca23e405e   Tejun Heo   vmalloc: implemen...
3174

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
  		va = pvm_find_va_enclose_addr(start);
  		if (WARN_ON_ONCE(va == NULL))
  			/* It is a BUG(), but trigger recovery instead. */
  			goto recovery;
  
  		type = classify_va_fit_type(va, start, size);
  		if (WARN_ON_ONCE(type == NOTHING_FIT))
  			/* It is a BUG(), but trigger recovery instead. */
  			goto recovery;
  
  		ret = adjust_va_to_fit_type(va, start, size, type);
  		if (unlikely(ret))
  			goto recovery;
  
  		/* Allocated area. */
  		va = vas[area];
  		va->va_start = start;
  		va->va_end = start + size;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3193
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3194

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3195
  	spin_unlock(&free_vmap_area_lock);
ca23e405e   Tejun Heo   vmalloc: implemen...
3196

253a496d8   Daniel Axtens   kasan: don't assu...
3197
3198
3199
3200
3201
3202
3203
3204
  	/* populate the kasan shadow space */
  	for (area = 0; area < nr_vms; area++) {
  		if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
  			goto err_free_shadow;
  
  		kasan_unpoison_vmalloc((void *)vas[area]->va_start,
  				       sizes[area]);
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3205
  	/* insert all vm's */
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3206
3207
3208
3209
3210
  	spin_lock(&vmap_area_lock);
  	for (area = 0; area < nr_vms; area++) {
  		insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list);
  
  		setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
3645cb4a4   Zhang Yanfei   mm, vmalloc: call...
3211
  				 pcpu_get_vm_areas);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3212
3213
  	}
  	spin_unlock(&vmap_area_lock);
ca23e405e   Tejun Heo   vmalloc: implemen...
3214
3215
3216
  
  	kfree(vas);
  	return vms;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3217
  recovery:
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3218
3219
3220
3221
3222
3223
  	/*
  	 * Remove previously allocated areas. There is no
  	 * need in removing these areas from the busy tree,
  	 * because they are inserted only on the final step
  	 * and when pcpu_get_vm_areas() is success.
  	 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3224
  	while (area--) {
253a496d8   Daniel Axtens   kasan: don't assu...
3225
3226
3227
3228
  		orig_start = vas[area]->va_start;
  		orig_end = vas[area]->va_end;
  		va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
  					    &free_vmap_area_list);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
3229
3230
3231
  		if (va)
  			kasan_release_vmalloc(orig_start, orig_end,
  				va->va_start, va->va_end);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3232
3233
3234
3235
  		vas[area] = NULL;
  	}
  
  overflow:
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3236
  	spin_unlock(&free_vmap_area_lock);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
  	if (!purged) {
  		purge_vmap_area_lazy();
  		purged = true;
  
  		/* Before "retry", check if we recover. */
  		for (area = 0; area < nr_vms; area++) {
  			if (vas[area])
  				continue;
  
  			vas[area] = kmem_cache_zalloc(
  				vmap_area_cachep, GFP_KERNEL);
  			if (!vas[area])
  				goto err_free;
  		}
  
  		goto retry;
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3254
3255
  err_free:
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3256
3257
  		if (vas[area])
  			kmem_cache_free(vmap_area_cachep, vas[area]);
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3258
  		kfree(vms[area]);
ca23e405e   Tejun Heo   vmalloc: implemen...
3259
  	}
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3260
  err_free2:
ca23e405e   Tejun Heo   vmalloc: implemen...
3261
3262
3263
  	kfree(vas);
  	kfree(vms);
  	return NULL;
253a496d8   Daniel Axtens   kasan: don't assu...
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
  
  err_free_shadow:
  	spin_lock(&free_vmap_area_lock);
  	/*
  	 * We release all the vmalloc shadows, even the ones for regions that
  	 * hadn't been successfully added. This relies on kasan_release_vmalloc
  	 * being able to tolerate this case.
  	 */
  	for (area = 0; area < nr_vms; area++) {
  		orig_start = vas[area]->va_start;
  		orig_end = vas[area]->va_end;
  		va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
  					    &free_vmap_area_list);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
3277
3278
3279
  		if (va)
  			kasan_release_vmalloc(orig_start, orig_end,
  				va->va_start, va->va_end);
253a496d8   Daniel Axtens   kasan: don't assu...
3280
3281
3282
3283
3284
3285
3286
  		vas[area] = NULL;
  		kfree(vms[area]);
  	}
  	spin_unlock(&free_vmap_area_lock);
  	kfree(vas);
  	kfree(vms);
  	return NULL;
ca23e405e   Tejun Heo   vmalloc: implemen...
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
  }
  
  /**
   * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
   * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
   * @nr_vms: the number of allocated areas
   *
   * Free vm_structs and the array allocated by pcpu_get_vm_areas().
   */
  void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
  {
  	int i;
  
  	for (i = 0; i < nr_vms; i++)
  		free_vm_area(vms[i]);
  	kfree(vms);
  }
4f8b02b4e   Tejun Heo   vmalloc: pcpu_get...
3304
  #endif	/* CONFIG_SMP */
a10aa5798   Christoph Lameter   vmalloc: show vma...
3305
3306
3307
  
  #ifdef CONFIG_PROC_FS
  static void *s_start(struct seq_file *m, loff_t *pos)
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3308
  	__acquires(&vmap_purge_lock)
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3309
  	__acquires(&vmap_area_lock)
a10aa5798   Christoph Lameter   vmalloc: show vma...
3310
  {
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3311
  	mutex_lock(&vmap_purge_lock);
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3312
  	spin_lock(&vmap_area_lock);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3313

3f5000693   zijun_hu   mm/vmalloc.c: sim...
3314
  	return seq_list_start(&vmap_area_list, *pos);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3315
3316
3317
3318
  }
  
  static void *s_next(struct seq_file *m, void *p, loff_t *pos)
  {
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3319
  	return seq_list_next(p, &vmap_area_list, pos);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3320
3321
3322
  }
  
  static void s_stop(struct seq_file *m, void *p)
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3323
  	__releases(&vmap_area_lock)
4a9d8b078   Waiman Long   mm/vmalloc: Fix u...
3324
  	__releases(&vmap_purge_lock)
a10aa5798   Christoph Lameter   vmalloc: show vma...
3325
  {
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3326
  	spin_unlock(&vmap_area_lock);
4a9d8b078   Waiman Long   mm/vmalloc: Fix u...
3327
  	mutex_unlock(&vmap_purge_lock);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3328
  }
a47a126ad   Eric Dumazet   vmallocinfo: add ...
3329
3330
  static void show_numa_info(struct seq_file *m, struct vm_struct *v)
  {
e5adfffc8   Kirill A. Shutemov   mm: use IS_ENABLE...
3331
  	if (IS_ENABLED(CONFIG_NUMA)) {
a47a126ad   Eric Dumazet   vmallocinfo: add ...
3332
3333
3334
3335
  		unsigned int nr, *counters = m->private;
  
  		if (!counters)
  			return;
af12346cd   Wanpeng Li   mm/vmalloc: rever...
3336
3337
  		if (v->flags & VM_UNINITIALIZED)
  			return;
7e5b528b4   Dmitry Vyukov   mm/vmalloc.c: fix...
3338
3339
  		/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
  		smp_rmb();
af12346cd   Wanpeng Li   mm/vmalloc: rever...
3340

a47a126ad   Eric Dumazet   vmallocinfo: add ...
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
  		memset(counters, 0, nr_node_ids * sizeof(unsigned int));
  
  		for (nr = 0; nr < v->nr_pages; nr++)
  			counters[page_to_nid(v->pages[nr])]++;
  
  		for_each_node_state(nr, N_HIGH_MEMORY)
  			if (counters[nr])
  				seq_printf(m, " N%u=%u", nr, counters[nr]);
  	}
  }
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
  static void show_purge_info(struct seq_file *m)
  {
  	struct llist_node *head;
  	struct vmap_area *va;
  
  	head = READ_ONCE(vmap_purge_list.first);
  	if (head == NULL)
  		return;
  
  	llist_for_each_entry(va, head, purge_list) {
  		seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area
  ",
  			(void *)va->va_start, (void *)va->va_end,
  			va->va_end - va->va_start);
  	}
  }
a10aa5798   Christoph Lameter   vmalloc: show vma...
3367
3368
  static int s_show(struct seq_file *m, void *p)
  {
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3369
  	struct vmap_area *va;
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3370
  	struct vm_struct *v;
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3371
  	va = list_entry(p, struct vmap_area, list);
c2ce8c142   Wanpeng Li   mm/vmalloc: fix s...
3372
  	/*
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
3373
3374
  	 * s_show can encounter race with remove_vm_area, !vm on behalf
  	 * of vmap area is being tear down or vm_map_ram allocation.
c2ce8c142   Wanpeng Li   mm/vmalloc: fix s...
3375
  	 */
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
3376
  	if (!va->vm) {
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3377
3378
  		seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram
  ",
78c72746f   Yisheng Xie   vmalloc: show laz...
3379
  			(void *)va->va_start, (void *)va->va_end,
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3380
  			va->va_end - va->va_start);
78c72746f   Yisheng Xie   vmalloc: show laz...
3381

d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3382
  		return 0;
78c72746f   Yisheng Xie   vmalloc: show laz...
3383
  	}
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3384
3385
  
  	v = va->vm;
a10aa5798   Christoph Lameter   vmalloc: show vma...
3386

45ec16908   Kees Cook   mm: use %pK for /...
3387
  	seq_printf(m, "0x%pK-0x%pK %7ld",
a10aa5798   Christoph Lameter   vmalloc: show vma...
3388
  		v->addr, v->addr + v->size, v->size);
62c70bce8   Joe Perches   mm: convert sprin...
3389
3390
  	if (v->caller)
  		seq_printf(m, " %pS", v->caller);
230169693   Christoph Lameter   vmallocinfo: add ...
3391

a10aa5798   Christoph Lameter   vmalloc: show vma...
3392
3393
3394
3395
  	if (v->nr_pages)
  		seq_printf(m, " pages=%d", v->nr_pages);
  
  	if (v->phys_addr)
199eaa05a   Miles Chen   mm: cleanups for ...
3396
  		seq_printf(m, " phys=%pa", &v->phys_addr);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3397
3398
  
  	if (v->flags & VM_IOREMAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3399
  		seq_puts(m, " ioremap");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3400
3401
  
  	if (v->flags & VM_ALLOC)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3402
  		seq_puts(m, " vmalloc");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3403
3404
  
  	if (v->flags & VM_MAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3405
  		seq_puts(m, " vmap");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3406
3407
  
  	if (v->flags & VM_USERMAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3408
  		seq_puts(m, " user");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3409

fe9041c24   Christoph Hellwig   vmalloc: lift the...
3410
3411
  	if (v->flags & VM_DMA_COHERENT)
  		seq_puts(m, " dma-coherent");
244d63ee3   David Rientjes   mm, vmalloc: remo...
3412
  	if (is_vmalloc_addr(v->pages))
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3413
  		seq_puts(m, " vpages");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3414

a47a126ad   Eric Dumazet   vmallocinfo: add ...
3415
  	show_numa_info(m, v);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3416
3417
  	seq_putc(m, '
  ');
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3418
3419
3420
3421
3422
3423
3424
3425
3426
  
  	/*
  	 * As a final step, dump "unpurged" areas. Note,
  	 * that entire "/proc/vmallocinfo" output will not
  	 * be address sorted, because the purge list is not
  	 * sorted.
  	 */
  	if (list_is_last(&va->list, &vmap_area_list))
  		show_purge_info(m);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3427
3428
  	return 0;
  }
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3429
  static const struct seq_operations vmalloc_op = {
a10aa5798   Christoph Lameter   vmalloc: show vma...
3430
3431
3432
3433
3434
  	.start = s_start,
  	.next = s_next,
  	.stop = s_stop,
  	.show = s_show,
  };
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3435

5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3436
3437
  static int __init proc_vmalloc_init(void)
  {
fddda2b7b   Christoph Hellwig   proc: introduce p...
3438
  	if (IS_ENABLED(CONFIG_NUMA))
0825a6f98   Joe Perches   mm: use octal not...
3439
  		proc_create_seq_private("vmallocinfo", 0400, NULL,
44414d82c   Christoph Hellwig   proc: introduce p...
3440
3441
  				&vmalloc_op,
  				nr_node_ids * sizeof(unsigned int), NULL);
fddda2b7b   Christoph Hellwig   proc: introduce p...
3442
  	else
0825a6f98   Joe Perches   mm: use octal not...
3443
  		proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3444
3445
3446
  	return 0;
  }
  module_init(proc_vmalloc_init);
db3808c1b   Joonsoo Kim   mm, vmalloc: move...
3447

a10aa5798   Christoph Lameter   vmalloc: show vma...
3448
  #endif