Blame view

mm/vmalloc.c 91.1 KB
457c89965   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3
4
5
6
   *  Copyright (C) 1993  Linus Torvalds
   *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
   *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
7
   *  Numa awareness, Christoph Lameter, SGI, June 2005
d758ffe6b   Uladzislau Rezki (Sony)   mm/vmalloc: updat...
8
   *  Improving global KVA allocator, Uladzislau Rezki, Sony, May 2019
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
   */
db64fe022   Nick Piggin   mm: rewrite vmap ...
10
  #include <linux/vmalloc.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
13
  #include <linux/mm.h>
  #include <linux/module.h>
  #include <linux/highmem.h>
c3edc4010   Ingo Molnar   sched/headers: Mo...
14
  #include <linux/sched/signal.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
  #include <linux/slab.h>
  #include <linux/spinlock.h>
  #include <linux/interrupt.h>
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
18
  #include <linux/proc_fs.h>
a10aa5798   Christoph Lameter   vmalloc: show vma...
19
  #include <linux/seq_file.h>
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
20
  #include <linux/set_memory.h>
3ac7fe5a4   Thomas Gleixner   infrastructure to...
21
  #include <linux/debugobjects.h>
230169693   Christoph Lameter   vmallocinfo: add ...
22
  #include <linux/kallsyms.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
23
  #include <linux/list.h>
4da56b99d   Chris Wilson   mm/vmap: Add a no...
24
  #include <linux/notifier.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
25
  #include <linux/rbtree.h>
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
26
  #include <linux/xarray.h>
db64fe022   Nick Piggin   mm: rewrite vmap ...
27
  #include <linux/rcupdate.h>
f0aa66179   Tejun Heo   vmalloc: implemen...
28
  #include <linux/pfn.h>
89219d37a   Catalin Marinas   kmemleak: Add the...
29
  #include <linux/kmemleak.h>
60063497a   Arun Sharma   atomic: use <linu...
30
  #include <linux/atomic.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
31
  #include <linux/compiler.h>
32fcfd407   Al Viro   make vfree() safe...
32
  #include <linux/llist.h>
0f616be12   Toshi Kani   mm: change __get_...
33
  #include <linux/bitops.h>
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
34
  #include <linux/rbtree_augmented.h>
bdebd6a28   Jann Horn   vmalloc: fix rema...
35
  #include <linux/overflow.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
36

7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
37
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
38
  #include <asm/tlbflush.h>
2dca6999e   David Miller   mm, perf_event: M...
39
  #include <asm/shmparam.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40

dd56b0464   Mel Gorman   mm: page_alloc: h...
41
  #include "internal.h"
2a681cfa5   Joerg Roedel   mm: move p?d_allo...
42
  #include "pgalloc-track.h"
dd56b0464   Mel Gorman   mm: page_alloc: h...
43

186525bd6   Ingo Molnar   mm, x86/mm: Untan...
44
45
46
47
48
49
50
  bool is_vmalloc_addr(const void *x)
  {
  	unsigned long addr = (unsigned long)x;
  
  	return addr >= VMALLOC_START && addr < VMALLOC_END;
  }
  EXPORT_SYMBOL(is_vmalloc_addr);
32fcfd407   Al Viro   make vfree() safe...
51
52
53
54
55
56
57
58
59
60
61
  struct vfree_deferred {
  	struct llist_head list;
  	struct work_struct wq;
  };
  static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
  
  static void __vunmap(const void *, int);
  
  static void free_work(struct work_struct *w)
  {
  	struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
894e58c14   Byungchul Park   mm/vmalloc.c: don...
62
63
64
65
  	struct llist_node *t, *llnode;
  
  	llist_for_each_safe(llnode, t, llist_del_all(&p->list))
  		__vunmap((void *)llnode, 1);
32fcfd407   Al Viro   make vfree() safe...
66
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
67
  /*** Page table manipulation functions ***/
b221385bc   Adrian Bunk   [PATCH] mm/: make...
68

2ba3e6947   Joerg Roedel   mm/vmalloc: track...
69
70
  static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
72
73
74
75
76
77
78
  {
  	pte_t *pte;
  
  	pte = pte_offset_kernel(pmd, addr);
  	do {
  		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
  		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
  	} while (pte++, addr += PAGE_SIZE, addr != end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
79
  	*mask |= PGTBL_PTE_MODIFIED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
  }
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
81
82
  static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83
84
85
  {
  	pmd_t *pmd;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
86
  	int cleared;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
89
90
  
  	pmd = pmd_offset(pud, addr);
  	do {
  		next = pmd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
91
92
93
94
95
96
  
  		cleared = pmd_clear_huge(pmd);
  		if (cleared || pmd_bad(*pmd))
  			*mask |= PGTBL_PMD_MODIFIED;
  
  		if (cleared)
b9820d8f3   Toshi Kani   mm: change vunmap...
97
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98
99
  		if (pmd_none_or_clear_bad(pmd))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
100
  		vunmap_pte_range(pmd, addr, next, mask);
e47110e90   Aneesh Kumar K.V   mm/vunmap: add co...
101
102
  
  		cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
  	} while (pmd++, addr = next, addr != end);
  }
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
105
106
  static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
108
109
  {
  	pud_t *pud;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
110
  	int cleared;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
111

c2febafc6   Kirill A. Shutemov   mm: convert gener...
112
  	pud = pud_offset(p4d, addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
114
  	do {
  		next = pud_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
115
116
117
118
119
120
  
  		cleared = pud_clear_huge(pud);
  		if (cleared || pud_bad(*pud))
  			*mask |= PGTBL_PUD_MODIFIED;
  
  		if (cleared)
b9820d8f3   Toshi Kani   mm: change vunmap...
121
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
123
  		if (pud_none_or_clear_bad(pud))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
124
  		vunmap_pmd_range(pud, addr, next, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
126
  	} while (pud++, addr = next, addr != end);
  }
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
127
128
  static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
  			     pgtbl_mod_mask *mask)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
129
130
131
  {
  	p4d_t *p4d;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
132
  	int cleared;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
133
134
135
136
  
  	p4d = p4d_offset(pgd, addr);
  	do {
  		next = p4d_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
137
138
139
140
141
142
  
  		cleared = p4d_clear_huge(p4d);
  		if (cleared || p4d_bad(*p4d))
  			*mask |= PGTBL_P4D_MODIFIED;
  
  		if (cleared)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
143
144
145
  			continue;
  		if (p4d_none_or_clear_bad(p4d))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
146
  		vunmap_pud_range(p4d, addr, next, mask);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
147
148
  	} while (p4d++, addr = next, addr != end);
  }
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
149
150
  /**
   * unmap_kernel_range_noflush - unmap kernel VM area
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
151
   * @start: start of the VM area to unmap
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
152
153
154
155
156
157
158
159
160
161
   * @size: size of the VM area to unmap
   *
   * Unmap PFN_UP(@size) pages at @addr.  The VM area @addr and @size specify
   * should have been allocated using get_vm_area() and its friends.
   *
   * NOTE:
   * This function does NOT do any cache flushing.  The caller is responsible
   * for calling flush_cache_vunmap() on to-be-mapped areas before calling this
   * function and flush_tlb_kernel_range() after.
   */
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
162
  void unmap_kernel_range_noflush(unsigned long start, unsigned long size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163
  {
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
164
  	unsigned long end = start + size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
165
  	unsigned long next;
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
166
  	pgd_t *pgd;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
167
168
  	unsigned long addr = start;
  	pgtbl_mod_mask mask = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
169
170
171
  
  	BUG_ON(addr >= end);
  	pgd = pgd_offset_k(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
  	do {
  		next = pgd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
174
175
  		if (pgd_bad(*pgd))
  			mask |= PGTBL_PGD_MODIFIED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
176
177
  		if (pgd_none_or_clear_bad(pgd))
  			continue;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
178
  		vunmap_p4d_range(pgd, addr, next, &mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
  	} while (pgd++, addr = next, addr != end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
180
181
182
  
  	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
  		arch_sync_kernel_mappings(start, end);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
184
185
  }
  
  static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
186
187
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
189
  {
  	pte_t *pte;
db64fe022   Nick Piggin   mm: rewrite vmap ...
190
191
192
193
  	/*
  	 * nr is a running index into the array which helps higher level
  	 * callers keep track of where we're up to.
  	 */
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
194
  	pte = pte_alloc_kernel_track(pmd, addr, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
195
196
197
  	if (!pte)
  		return -ENOMEM;
  	do {
db64fe022   Nick Piggin   mm: rewrite vmap ...
198
199
200
201
202
  		struct page *page = pages[*nr];
  
  		if (WARN_ON(!pte_none(*pte)))
  			return -EBUSY;
  		if (WARN_ON(!page))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
  			return -ENOMEM;
  		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
db64fe022   Nick Piggin   mm: rewrite vmap ...
205
  		(*nr)++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
  	} while (pte++, addr += PAGE_SIZE, addr != end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
207
  	*mask |= PGTBL_PTE_MODIFIED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
209
  	return 0;
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
210
  static int vmap_pmd_range(pud_t *pud, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
211
212
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213
214
215
  {
  	pmd_t *pmd;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
216
  	pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
217
218
219
220
  	if (!pmd)
  		return -ENOMEM;
  	do {
  		next = pmd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
221
  		if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
222
223
224
225
  			return -ENOMEM;
  	} while (pmd++, addr = next, addr != end);
  	return 0;
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
226
  static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
227
228
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
229
230
231
  {
  	pud_t *pud;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
232
  	pud = pud_alloc_track(&init_mm, p4d, addr, mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
234
235
236
  	if (!pud)
  		return -ENOMEM;
  	do {
  		next = pud_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
237
  		if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
239
240
241
  			return -ENOMEM;
  	} while (pud++, addr = next, addr != end);
  	return 0;
  }
c2febafc6   Kirill A. Shutemov   mm: convert gener...
242
  static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
243
244
  		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
  		pgtbl_mod_mask *mask)
c2febafc6   Kirill A. Shutemov   mm: convert gener...
245
246
247
  {
  	p4d_t *p4d;
  	unsigned long next;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
248
  	p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
249
250
251
252
  	if (!p4d)
  		return -ENOMEM;
  	do {
  		next = p4d_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
253
  		if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
254
255
256
257
  			return -ENOMEM;
  	} while (p4d++, addr = next, addr != end);
  	return 0;
  }
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
258
259
260
261
262
263
  /**
   * map_kernel_range_noflush - map kernel VM area with the specified pages
   * @addr: start of the VM area to map
   * @size: size of the VM area to map
   * @prot: page protection flags to use
   * @pages: pages to map
db64fe022   Nick Piggin   mm: rewrite vmap ...
264
   *
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
265
266
267
268
269
270
271
272
273
   * Map PFN_UP(@size) pages at @addr.  The VM area @addr and @size specify should
   * have been allocated using get_vm_area() and its friends.
   *
   * NOTE:
   * This function does NOT do any cache flushing.  The caller is responsible for
   * calling flush_cache_vmap() on to-be-mapped areas before calling this
   * function.
   *
   * RETURNS:
60bb44652   Christoph Hellwig   mm: don't return ...
274
   * 0 on success, -errno on failure.
db64fe022   Nick Piggin   mm: rewrite vmap ...
275
   */
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
276
277
  int map_kernel_range_noflush(unsigned long addr, unsigned long size,
  			     pgprot_t prot, struct page **pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
  {
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
279
  	unsigned long start = addr;
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
280
  	unsigned long end = addr + size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281
  	unsigned long next;
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
282
  	pgd_t *pgd;
db64fe022   Nick Piggin   mm: rewrite vmap ...
283
284
  	int err = 0;
  	int nr = 0;
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
285
  	pgtbl_mod_mask mask = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
288
  
  	BUG_ON(addr >= end);
  	pgd = pgd_offset_k(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289
290
  	do {
  		next = pgd_addr_end(addr, end);
2ba3e6947   Joerg Roedel   mm/vmalloc: track...
291
292
293
  		if (pgd_bad(*pgd))
  			mask |= PGTBL_PGD_MODIFIED;
  		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294
  		if (err)
bf88c8c83   Figo.zhang   vmalloc.c: fix do...
295
  			return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
296
  	} while (pgd++, addr = next, addr != end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
297

2ba3e6947   Joerg Roedel   mm/vmalloc: track...
298
299
  	if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
  		arch_sync_kernel_mappings(start, end);
60bb44652   Christoph Hellwig   mm: don't return ...
300
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
  }
ed1f324c5   Christoph Hellwig   mm: remove map_vm...
302
303
  int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
  		struct page **pages)
8fc489850   Tejun Heo   vmalloc: add un/m...
304
305
  {
  	int ret;
a29adb620   Christoph Hellwig   mm: rename vmap_p...
306
307
  	ret = map_kernel_range_noflush(start, size, prot, pages);
  	flush_cache_vmap(start, start + size);
8fc489850   Tejun Heo   vmalloc: add un/m...
308
309
  	return ret;
  }
81ac3ad90   KAMEZAWA Hiroyuki   kcore: register m...
310
  int is_vmalloc_or_module_addr(const void *x)
73bdf0a60   Linus Torvalds   Introduce is_vmal...
311
312
  {
  	/*
ab4f2ee13   Russell King   [ARM] fix naming ...
313
  	 * ARM, x86-64 and sparc64 put modules in a special place,
73bdf0a60   Linus Torvalds   Introduce is_vmal...
314
315
316
317
318
319
320
321
322
323
  	 * and fall back on vmalloc() if that fails. Others
  	 * just put it in the vmalloc space.
  	 */
  #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
  	unsigned long addr = (unsigned long)x;
  	if (addr >= MODULES_VADDR && addr < MODULES_END)
  		return 1;
  #endif
  	return is_vmalloc_addr(x);
  }
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
324
  /*
add688fbd   malc   Revert "mm/vmallo...
325
   * Walk a vmap address to the struct page it maps.
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
326
   */
add688fbd   malc   Revert "mm/vmallo...
327
  struct page *vmalloc_to_page(const void *vmalloc_addr)
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
328
329
  {
  	unsigned long addr = (unsigned long) vmalloc_addr;
add688fbd   malc   Revert "mm/vmallo...
330
  	struct page *page = NULL;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
331
  	pgd_t *pgd = pgd_offset_k(addr);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
332
333
334
335
  	p4d_t *p4d;
  	pud_t *pud;
  	pmd_t *pmd;
  	pte_t *ptep, pte;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
336

7aa413def   Ingo Molnar   x86, MM: virtual ...
337
338
339
340
  	/*
  	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
  	 * architectures that do not vmalloc module space
  	 */
73bdf0a60   Linus Torvalds   Introduce is_vmal...
341
  	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
59ea74633   Jiri Slaby   MM: virtual addre...
342

c2febafc6   Kirill A. Shutemov   mm: convert gener...
343
344
345
346
347
348
  	if (pgd_none(*pgd))
  		return NULL;
  	p4d = p4d_offset(pgd, addr);
  	if (p4d_none(*p4d))
  		return NULL;
  	pud = pud_offset(p4d, addr);
029c54b09   Ard Biesheuvel   mm/vmalloc.c: hug...
349
350
351
352
353
354
355
356
357
358
359
  
  	/*
  	 * Don't dereference bad PUD or PMD (below) entries. This will also
  	 * identify huge mappings, which we may encounter on architectures
  	 * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
  	 * identified as vmalloc addresses by is_vmalloc_addr(), but are
  	 * not [unambiguously] associated with a struct page, so there is
  	 * no correct value to return for them.
  	 */
  	WARN_ON_ONCE(pud_bad(*pud));
  	if (pud_none(*pud) || pud_bad(*pud))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
360
361
  		return NULL;
  	pmd = pmd_offset(pud, addr);
029c54b09   Ard Biesheuvel   mm/vmalloc.c: hug...
362
363
  	WARN_ON_ONCE(pmd_bad(*pmd));
  	if (pmd_none(*pmd) || pmd_bad(*pmd))
c2febafc6   Kirill A. Shutemov   mm: convert gener...
364
365
366
367
368
369
370
  		return NULL;
  
  	ptep = pte_offset_map(pmd, addr);
  	pte = *ptep;
  	if (pte_present(pte))
  		page = pte_page(pte);
  	pte_unmap(ptep);
add688fbd   malc   Revert "mm/vmallo...
371
  	return page;
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
372
  }
add688fbd   malc   Revert "mm/vmallo...
373
  EXPORT_SYMBOL(vmalloc_to_page);
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
374
375
  
  /*
add688fbd   malc   Revert "mm/vmallo...
376
   * Map a vmalloc()-space virtual address to the physical page frame number.
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
377
   */
add688fbd   malc   Revert "mm/vmallo...
378
  unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
379
  {
add688fbd   malc   Revert "mm/vmallo...
380
  	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
381
  }
add688fbd   malc   Revert "mm/vmallo...
382
  EXPORT_SYMBOL(vmalloc_to_pfn);
48667e7a4   Christoph Lameter   Move vmalloc_to_p...
383

db64fe022   Nick Piggin   mm: rewrite vmap ...
384
385
  
  /*** Global kva allocator ***/
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
386
  #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
387
  #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
388

db64fe022   Nick Piggin   mm: rewrite vmap ...
389

db64fe022   Nick Piggin   mm: rewrite vmap ...
390
  static DEFINE_SPINLOCK(vmap_area_lock);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
391
  static DEFINE_SPINLOCK(free_vmap_area_lock);
f1c4069e1   Joonsoo Kim   mm, vmalloc: expo...
392
393
  /* Export for kexec only */
  LIST_HEAD(vmap_area_list);
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
394
  static LLIST_HEAD(vmap_purge_list);
89699605f   Nick Piggin   mm: vmap area cache
395
  static struct rb_root vmap_area_root = RB_ROOT;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
396
  static bool vmap_initialized __read_mostly;
89699605f   Nick Piggin   mm: vmap area cache
397

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
  /*
   * This kmem_cache is used for vmap_area objects. Instead of
   * allocating from slab we reuse an object from this cache to
   * make things faster. Especially in "no edge" splitting of
   * free block.
   */
  static struct kmem_cache *vmap_area_cachep;
  
  /*
   * This linked list is used in pair with free_vmap_area_root.
   * It gives O(1) access to prev/next to perform fast coalescing.
   */
  static LIST_HEAD(free_vmap_area_list);
  
  /*
   * This augment red-black tree represents the free vmap space.
   * All vmap_area objects in this tree are sorted by va->va_start
   * address. It is used for allocation and merging when a vmap
   * object is released.
   *
   * Each vmap_area node contains a maximum available free block
   * of its sub-tree, right or left. Therefore it is possible to
   * find a lowest match of free area.
   */
  static struct rb_root free_vmap_area_root = RB_ROOT;
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
423
424
425
426
427
428
  /*
   * Preload a CPU with one object for "no edge" split case. The
   * aim is to get rid of allocations from the atomic context, thus
   * to use more permissive allocation masks.
   */
  static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
429
430
431
432
433
434
435
436
437
438
439
440
441
442
  static __always_inline unsigned long
  va_size(struct vmap_area *va)
  {
  	return (va->va_end - va->va_start);
  }
  
  static __always_inline unsigned long
  get_subtree_max_size(struct rb_node *node)
  {
  	struct vmap_area *va;
  
  	va = rb_entry_safe(node, struct vmap_area, rb_node);
  	return va ? va->subtree_max_size : 0;
  }
89699605f   Nick Piggin   mm: vmap area cache
443

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
444
445
446
447
448
449
450
451
452
453
  /*
   * Gets called when remove the node and rotate.
   */
  static __always_inline unsigned long
  compute_subtree_max_size(struct vmap_area *va)
  {
  	return max3(va_size(va),
  		get_subtree_max_size(va->rb_node.rb_left),
  		get_subtree_max_size(va->rb_node.rb_right));
  }
315cc066b   Michel Lespinasse   augmented rbtree:...
454
455
  RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
  	struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
456
457
458
459
  
  static void purge_vmap_area_lazy(void);
  static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
  static unsigned long lazy_max_pages(void);
db64fe022   Nick Piggin   mm: rewrite vmap ...
460

97105f0ab   Roman Gushchin   mm: vmalloc: show...
461
462
463
464
465
466
  static atomic_long_t nr_vmalloc_pages;
  
  unsigned long vmalloc_nr_pages(void)
  {
  	return atomic_long_read(&nr_vmalloc_pages);
  }
e46a1a5e3   Vijayanand Jitta   ANDROID: mm: Expo...
467
  EXPORT_SYMBOL_GPL(vmalloc_nr_pages);
97105f0ab   Roman Gushchin   mm: vmalloc: show...
468

db64fe022   Nick Piggin   mm: rewrite vmap ...
469
  static struct vmap_area *__find_vmap_area(unsigned long addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
471
472
473
474
475
476
477
478
  	struct rb_node *n = vmap_area_root.rb_node;
  
  	while (n) {
  		struct vmap_area *va;
  
  		va = rb_entry(n, struct vmap_area, rb_node);
  		if (addr < va->va_start)
  			n = n->rb_left;
cef2ac3f6   HATAYAMA Daisuke   vmalloc: make fin...
479
  		else if (addr >= va->va_end)
db64fe022   Nick Piggin   mm: rewrite vmap ...
480
481
482
483
484
485
486
  			n = n->rb_right;
  		else
  			return va;
  	}
  
  	return NULL;
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
487
488
489
  /*
   * This function returns back addresses of parent node
   * and its left or right link for further processing.
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
490
491
492
493
   *
   * Otherwise NULL is returned. In that case all further
   * steps regarding inserting of conflicting overlap range
   * have to be declined and actually considered as a bug.
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
   */
  static __always_inline struct rb_node **
  find_va_links(struct vmap_area *va,
  	struct rb_root *root, struct rb_node *from,
  	struct rb_node **parent)
  {
  	struct vmap_area *tmp_va;
  	struct rb_node **link;
  
  	if (root) {
  		link = &root->rb_node;
  		if (unlikely(!*link)) {
  			*parent = NULL;
  			return link;
  		}
  	} else {
  		link = &from;
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
512

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
513
514
515
516
517
518
519
  	/*
  	 * Go to the bottom of the tree. When we hit the last point
  	 * we end up with parent rb_node and correct direction, i name
  	 * it link, where the new va->rb_node will be attached to.
  	 */
  	do {
  		tmp_va = rb_entry(*link, struct vmap_area, rb_node);
db64fe022   Nick Piggin   mm: rewrite vmap ...
520

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
521
522
523
524
525
526
527
528
529
530
531
  		/*
  		 * During the traversal we also do some sanity check.
  		 * Trigger the BUG() if there are sides(left/right)
  		 * or full overlaps.
  		 */
  		if (va->va_start < tmp_va->va_end &&
  				va->va_end <= tmp_va->va_start)
  			link = &(*link)->rb_left;
  		else if (va->va_end > tmp_va->va_start &&
  				va->va_start >= tmp_va->va_end)
  			link = &(*link)->rb_right;
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
532
533
534
535
536
537
538
  		else {
  			WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx
  ",
  				va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end);
  
  			return NULL;
  		}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
  	} while (*link);
  
  	*parent = &tmp_va->rb_node;
  	return link;
  }
  
  static __always_inline struct list_head *
  get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
  {
  	struct list_head *list;
  
  	if (unlikely(!parent))
  		/*
  		 * The red-black tree where we try to find VA neighbors
  		 * before merging or inserting is empty, i.e. it means
  		 * there is no free vmap space. Normally it does not
  		 * happen but we handle this case anyway.
  		 */
  		return NULL;
  
  	list = &rb_entry(parent, struct vmap_area, rb_node)->list;
  	return (&parent->rb_right == link ? list->next : list);
  }
  
  static __always_inline void
  link_va(struct vmap_area *va, struct rb_root *root,
  	struct rb_node *parent, struct rb_node **link, struct list_head *head)
  {
  	/*
  	 * VA is still not in the list, but we can
  	 * identify its future previous list_head node.
  	 */
  	if (likely(parent)) {
  		head = &rb_entry(parent, struct vmap_area, rb_node)->list;
  		if (&parent->rb_right != link)
  			head = head->prev;
db64fe022   Nick Piggin   mm: rewrite vmap ...
575
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
  	/* Insert to the rb-tree */
  	rb_link_node(&va->rb_node, parent, link);
  	if (root == &free_vmap_area_root) {
  		/*
  		 * Some explanation here. Just perform simple insertion
  		 * to the tree. We do not set va->subtree_max_size to
  		 * its current size before calling rb_insert_augmented().
  		 * It is because of we populate the tree from the bottom
  		 * to parent levels when the node _is_ in the tree.
  		 *
  		 * Therefore we set subtree_max_size to zero after insertion,
  		 * to let __augment_tree_propagate_from() puts everything to
  		 * the correct order later on.
  		 */
  		rb_insert_augmented(&va->rb_node,
  			root, &free_vmap_area_rb_augment_cb);
  		va->subtree_max_size = 0;
  	} else {
  		rb_insert_color(&va->rb_node, root);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
596

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
597
598
  	/* Address-sort this list */
  	list_add(&va->list, head);
db64fe022   Nick Piggin   mm: rewrite vmap ...
599
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
600
601
602
  static __always_inline void
  unlink_va(struct vmap_area *va, struct rb_root *root)
  {
460e42d19   Uladzislau Rezki (Sony)   mm/vmalloc.c: swi...
603
604
  	if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
  		return;
db64fe022   Nick Piggin   mm: rewrite vmap ...
605

460e42d19   Uladzislau Rezki (Sony)   mm/vmalloc.c: swi...
606
607
608
609
610
611
612
613
  	if (root == &free_vmap_area_root)
  		rb_erase_augmented(&va->rb_node,
  			root, &free_vmap_area_rb_augment_cb);
  	else
  		rb_erase(&va->rb_node, root);
  
  	list_del(&va->list);
  	RB_CLEAR_NODE(&va->rb_node);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
614
  }
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
615
616
  #if DEBUG_AUGMENT_PROPAGATE_CHECK
  static void
da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
617
  augment_tree_propagate_check(void)
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
618
619
  {
  	struct vmap_area *va;
da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
620
  	unsigned long computed_size;
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
621

da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
622
623
624
625
626
627
  	list_for_each_entry(va, &free_vmap_area_list, list) {
  		computed_size = compute_subtree_max_size(va);
  		if (computed_size != va->subtree_max_size)
  			pr_emerg("tree is corrupted: %lu, %lu
  ",
  				va_size(va), va->subtree_max_size);
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
628
  	}
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
629
630
  }
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
  /*
   * This function populates subtree_max_size from bottom to upper
   * levels starting from VA point. The propagation must be done
   * when VA size is modified by changing its va_start/va_end. Or
   * in case of newly inserting of VA to the tree.
   *
   * It means that __augment_tree_propagate_from() must be called:
   * - After VA has been inserted to the tree(free path);
   * - After VA has been shrunk(allocation path);
   * - After VA has been increased(merging path).
   *
   * Please note that, it does not mean that upper parent nodes
   * and their subtree_max_size are recalculated all the time up
   * to the root node.
   *
   *       4--8
   *        /\
   *       /  \
   *      /    \
   *    2--2  8--8
   *
   * For example if we modify the node 4, shrinking it to 2, then
   * no any modification is required. If we shrink the node 2 to 1
   * its subtree_max_size is updated only, and set to 1. If we shrink
   * the node 8 to 6, then its subtree_max_size is set to 6 and parent
   * node becomes 4--6.
   */
  static __always_inline void
  augment_tree_propagate_from(struct vmap_area *va)
  {
15ae144f7   Uladzislau Rezki (Sony)   mm/vmalloc: switc...
661
662
663
664
665
666
  	/*
  	 * Populate the tree from bottom towards the root until
  	 * the calculated maximum available size of checked node
  	 * is equal to its current one.
  	 */
  	free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL);
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
667
668
  
  #if DEBUG_AUGMENT_PROPAGATE_CHECK
da27c9ed1   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
669
  	augment_tree_propagate_check();
bb850f4da   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
670
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
671
672
673
674
675
676
677
678
679
680
  }
  
  static void
  insert_vmap_area(struct vmap_area *va,
  	struct rb_root *root, struct list_head *head)
  {
  	struct rb_node **link;
  	struct rb_node *parent;
  
  	link = find_va_links(va, root, NULL, &parent);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
681
682
  	if (link)
  		link_va(va, root, parent, link, head);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
683
684
685
686
687
688
689
690
691
692
693
694
695
696
  }
  
  static void
  insert_vmap_area_augment(struct vmap_area *va,
  	struct rb_node *from, struct rb_root *root,
  	struct list_head *head)
  {
  	struct rb_node **link;
  	struct rb_node *parent;
  
  	if (from)
  		link = find_va_links(va, NULL, from, &parent);
  	else
  		link = find_va_links(va, root, NULL, &parent);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
697
698
699
700
  	if (link) {
  		link_va(va, root, parent, link, head);
  		augment_tree_propagate_from(va);
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
701
702
703
704
705
706
707
  }
  
  /*
   * Merge de-allocated chunk of VA memory with previous
   * and next free blocks. If coalesce is not done a new
   * free area is inserted. If VA has been merged, it is
   * freed.
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
708
709
710
711
712
   *
   * Please note, it can return NULL in case of overlap
   * ranges, followed by WARN() report. Despite it is a
   * buggy behaviour, a system can be alive and keep
   * ongoing.
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
713
   */
3c5c3cfb9   Daniel Axtens   kasan: support ba...
714
  static __always_inline struct vmap_area *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
715
716
717
718
719
720
721
722
723
724
725
726
727
728
  merge_or_add_vmap_area(struct vmap_area *va,
  	struct rb_root *root, struct list_head *head)
  {
  	struct vmap_area *sibling;
  	struct list_head *next;
  	struct rb_node **link;
  	struct rb_node *parent;
  	bool merged = false;
  
  	/*
  	 * Find a place in the tree where VA potentially will be
  	 * inserted, unless it is merged with its sibling/siblings.
  	 */
  	link = find_va_links(va, root, NULL, &parent);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
729
730
  	if (!link)
  		return NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
  
  	/*
  	 * Get next node of VA to check if merging can be done.
  	 */
  	next = get_va_next_sibling(parent, link);
  	if (unlikely(next == NULL))
  		goto insert;
  
  	/*
  	 * start            end
  	 * |                |
  	 * |<------VA------>|<-----Next----->|
  	 *                  |                |
  	 *                  start            end
  	 */
  	if (next != head) {
  		sibling = list_entry(next, struct vmap_area, list);
  		if (sibling->va_start == va->va_end) {
  			sibling->va_start = va->va_start;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
  			/* Free vmap_area object. */
  			kmem_cache_free(vmap_area_cachep, va);
  
  			/* Point to the new merged area. */
  			va = sibling;
  			merged = true;
  		}
  	}
  
  	/*
  	 * start            end
  	 * |                |
  	 * |<-----Prev----->|<------VA------>|
  	 *                  |                |
  	 *                  start            end
  	 */
  	if (next->prev != head) {
  		sibling = list_entry(next->prev, struct vmap_area, list);
  		if (sibling->va_end == va->va_start) {
5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
769
770
771
772
773
774
775
  			/*
  			 * If both neighbors are coalesced, it is important
  			 * to unlink the "next" node first, followed by merging
  			 * with "previous" one. Otherwise the tree might not be
  			 * fully populated if a sibling's augmented value is
  			 * "normalized" because of rotation operations.
  			 */
54f63d9d8   Uladzislau Rezki (Sony)   mm/vmalloc.c: get...
776
777
  			if (merged)
  				unlink_va(va, root);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
778

5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
779
  			sibling->va_end = va->va_end;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
780
781
  			/* Free vmap_area object. */
  			kmem_cache_free(vmap_area_cachep, va);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
782
783
784
785
  
  			/* Point to the new merged area. */
  			va = sibling;
  			merged = true;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
786
787
788
789
  		}
  	}
  
  insert:
5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
790
  	if (!merged)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
791
  		link_va(va, root, parent, link, head);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
792

5dd786409   Uladzislau Rezki (Sony)   mm/vmalloc: simpl...
793
794
795
796
  	/*
  	 * Last step is to check and update the tree.
  	 */
  	augment_tree_propagate_from(va);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
797
  	return va;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
  }
  
  static __always_inline bool
  is_within_this_va(struct vmap_area *va, unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	unsigned long nva_start_addr;
  
  	if (va->va_start > vstart)
  		nva_start_addr = ALIGN(va->va_start, align);
  	else
  		nva_start_addr = ALIGN(vstart, align);
  
  	/* Can be overflowed due to big size or alignment. */
  	if (nva_start_addr + size < nva_start_addr ||
  			nva_start_addr < vstart)
  		return false;
  
  	return (nva_start_addr + size <= va->va_end);
  }
  
  /*
   * Find the first free block(lowest start address) in the tree,
   * that will accomplish the request corresponding to passing
   * parameters.
   */
  static __always_inline struct vmap_area *
  find_vmap_lowest_match(unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	struct vmap_area *va;
  	struct rb_node *node;
  	unsigned long length;
  
  	/* Start from the root. */
  	node = free_vmap_area_root.rb_node;
  
  	/* Adjust the search size for alignment overhead. */
  	length = size + align - 1;
  
  	while (node) {
  		va = rb_entry(node, struct vmap_area, rb_node);
  
  		if (get_subtree_max_size(node->rb_left) >= length &&
  				vstart < va->va_start) {
  			node = node->rb_left;
  		} else {
  			if (is_within_this_va(va, size, align, vstart))
  				return va;
  
  			/*
  			 * Does not make sense to go deeper towards the right
  			 * sub-tree if it does not have a free block that is
  			 * equal or bigger to the requested search length.
  			 */
  			if (get_subtree_max_size(node->rb_right) >= length) {
  				node = node->rb_right;
  				continue;
  			}
  
  			/*
3806b0414   Andrew Morton   mm/vmalloc.c: fix...
859
  			 * OK. We roll back and find the first right sub-tree,
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
  			 * that will satisfy the search criteria. It can happen
  			 * only once due to "vstart" restriction.
  			 */
  			while ((node = rb_parent(node))) {
  				va = rb_entry(node, struct vmap_area, rb_node);
  				if (is_within_this_va(va, size, align, vstart))
  					return va;
  
  				if (get_subtree_max_size(node->rb_right) >= length &&
  						vstart <= va->va_start) {
  					node = node->rb_right;
  					break;
  				}
  			}
  		}
  	}
  
  	return NULL;
  }
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
  #include <linux/random.h>
  
  static struct vmap_area *
  find_vmap_lowest_linear_match(unsigned long size,
  	unsigned long align, unsigned long vstart)
  {
  	struct vmap_area *va;
  
  	list_for_each_entry(va, &free_vmap_area_list, list) {
  		if (!is_within_this_va(va, size, align, vstart))
  			continue;
  
  		return va;
  	}
  
  	return NULL;
  }
  
  static void
  find_vmap_lowest_match_check(unsigned long size)
  {
  	struct vmap_area *va_1, *va_2;
  	unsigned long vstart;
  	unsigned int rnd;
  
  	get_random_bytes(&rnd, sizeof(rnd));
  	vstart = VMALLOC_START + rnd;
  
  	va_1 = find_vmap_lowest_match(size, 1, vstart);
  	va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
  
  	if (va_1 != va_2)
  		pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx
  ",
  			va_1, va_2, vstart);
  }
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
  enum fit_type {
  	NOTHING_FIT = 0,
  	FL_FIT_TYPE = 1,	/* full fit */
  	LE_FIT_TYPE = 2,	/* left edge fit */
  	RE_FIT_TYPE = 3,	/* right edge fit */
  	NE_FIT_TYPE = 4		/* no edge fit */
  };
  
  static __always_inline enum fit_type
  classify_va_fit_type(struct vmap_area *va,
  	unsigned long nva_start_addr, unsigned long size)
  {
  	enum fit_type type;
  
  	/* Check if it is within VA. */
  	if (nva_start_addr < va->va_start ||
  			nva_start_addr + size > va->va_end)
  		return NOTHING_FIT;
  
  	/* Now classify. */
  	if (va->va_start == nva_start_addr) {
  		if (va->va_end == nva_start_addr + size)
  			type = FL_FIT_TYPE;
  		else
  			type = LE_FIT_TYPE;
  	} else if (va->va_end == nva_start_addr + size) {
  		type = RE_FIT_TYPE;
  	} else {
  		type = NE_FIT_TYPE;
  	}
  
  	return type;
  }
  
  static __always_inline int
  adjust_va_to_fit_type(struct vmap_area *va,
  	unsigned long nva_start_addr, unsigned long size,
  	enum fit_type type)
  {
2c9292336   Arnd Bergmann   mm/vmalloc.c: avo...
956
  	struct vmap_area *lva = NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
  
  	if (type == FL_FIT_TYPE) {
  		/*
  		 * No need to split VA, it fully fits.
  		 *
  		 * |               |
  		 * V      NVA      V
  		 * |---------------|
  		 */
  		unlink_va(va, &free_vmap_area_root);
  		kmem_cache_free(vmap_area_cachep, va);
  	} else if (type == LE_FIT_TYPE) {
  		/*
  		 * Split left edge of fit VA.
  		 *
  		 * |       |
  		 * V  NVA  V   R
  		 * |-------|-------|
  		 */
  		va->va_start += size;
  	} else if (type == RE_FIT_TYPE) {
  		/*
  		 * Split right edge of fit VA.
  		 *
  		 *         |       |
  		 *     L   V  NVA  V
  		 * |-------|-------|
  		 */
  		va->va_end = nva_start_addr;
  	} else if (type == NE_FIT_TYPE) {
  		/*
  		 * Split no edge of fit VA.
  		 *
  		 *     |       |
  		 *   L V  NVA  V R
  		 * |---|-------|---|
  		 */
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
  		lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
  		if (unlikely(!lva)) {
  			/*
  			 * For percpu allocator we do not do any pre-allocation
  			 * and leave it as it is. The reason is it most likely
  			 * never ends up with NE_FIT_TYPE splitting. In case of
  			 * percpu allocations offsets and sizes are aligned to
  			 * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE
  			 * are its main fitting cases.
  			 *
  			 * There are a few exceptions though, as an example it is
  			 * a first allocation (early boot up) when we have "one"
  			 * big free space that has to be split.
060650a2a   Uladzislau Rezki (Sony)   mm/vmalloc: add m...
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
  			 *
  			 * Also we can hit this path in case of regular "vmap"
  			 * allocations, if "this" current CPU was not preloaded.
  			 * See the comment in alloc_vmap_area() why. If so, then
  			 * GFP_NOWAIT is used instead to get an extra object for
  			 * split purpose. That is rare and most time does not
  			 * occur.
  			 *
  			 * What happens if an allocation gets failed. Basically,
  			 * an "overflow" path is triggered to purge lazily freed
  			 * areas to free some memory, then, the "retry" path is
  			 * triggered to repeat one more time. See more details
  			 * in alloc_vmap_area() function.
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1020
1021
1022
1023
1024
  			 */
  			lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
  			if (!lva)
  				return -1;
  		}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
  
  		/*
  		 * Build the remainder.
  		 */
  		lva->va_start = va->va_start;
  		lva->va_end = nva_start_addr;
  
  		/*
  		 * Shrink this VA to remaining size.
  		 */
  		va->va_start = nva_start_addr + size;
  	} else {
  		return -1;
  	}
  
  	if (type != FL_FIT_TYPE) {
  		augment_tree_propagate_from(va);
2c9292336   Arnd Bergmann   mm/vmalloc.c: avo...
1042
  		if (lva)	/* type == NE_FIT_TYPE */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
  			insert_vmap_area_augment(lva, &va->rb_node,
  				&free_vmap_area_root, &free_vmap_area_list);
  	}
  
  	return 0;
  }
  
  /*
   * Returns a start address of the newly allocated area, if success.
   * Otherwise a vend is returned that indicates failure.
   */
  static __always_inline unsigned long
  __alloc_vmap_area(unsigned long size, unsigned long align,
cacca6baf   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1056
  	unsigned long vstart, unsigned long vend)
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
  {
  	unsigned long nva_start_addr;
  	struct vmap_area *va;
  	enum fit_type type;
  	int ret;
  
  	va = find_vmap_lowest_match(size, align, vstart);
  	if (unlikely(!va))
  		return vend;
  
  	if (va->va_start > vstart)
  		nva_start_addr = ALIGN(va->va_start, align);
  	else
  		nva_start_addr = ALIGN(vstart, align);
  
  	/* Check the "vend" restriction. */
  	if (nva_start_addr + size > vend)
  		return vend;
  
  	/* Classify what we have found. */
  	type = classify_va_fit_type(va, nva_start_addr, size);
  	if (WARN_ON_ONCE(type == NOTHING_FIT))
  		return vend;
  
  	/* Update the free vmap_area. */
  	ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
  	if (ret)
  		return vend;
a6cf4e0fe   Uladzislau Rezki (Sony)   mm/vmap: add DEBU...
1085
1086
1087
  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
  	find_vmap_lowest_match_check(size);
  #endif
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1088
1089
  	return nva_start_addr;
  }
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1090

db64fe022   Nick Piggin   mm: rewrite vmap ...
1091
  /*
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
   * Free a region of KVA allocated by alloc_vmap_area
   */
  static void free_vmap_area(struct vmap_area *va)
  {
  	/*
  	 * Remove from the busy tree/list.
  	 */
  	spin_lock(&vmap_area_lock);
  	unlink_va(va, &vmap_area_root);
  	spin_unlock(&vmap_area_lock);
  
  	/*
  	 * Insert/Merge it back to the free tree/list.
  	 */
  	spin_lock(&free_vmap_area_lock);
  	merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
  	spin_unlock(&free_vmap_area_lock);
  }
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1112
1113
1114
1115
1116
1117
1118
1119
   * Allocate a region of KVA of the specified size and alignment, within the
   * vstart and vend.
   */
  static struct vmap_area *alloc_vmap_area(unsigned long size,
  				unsigned long align,
  				unsigned long vstart, unsigned long vend,
  				int node, gfp_t gfp_mask)
  {
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1120
  	struct vmap_area *va, *pva;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1121
  	unsigned long addr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1122
  	int purged = 0;
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1123
  	int ret;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1124

7766970cc   Nick Piggin   mm: vmap fix over...
1125
  	BUG_ON(!size);
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1126
  	BUG_ON(offset_in_page(size));
89699605f   Nick Piggin   mm: vmap area cache
1127
  	BUG_ON(!is_power_of_2(align));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1128

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1129
1130
  	if (unlikely(!vmap_initialized))
  		return ERR_PTR(-EBUSY);
5803ed292   Christoph Hellwig   mm: mark all call...
1131
  	might_sleep();
f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1132
  	gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1133

f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1134
  	va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1135
1136
  	if (unlikely(!va))
  		return ERR_PTR(-ENOMEM);
7f88f88f8   Catalin Marinas   mm: kmemleak: avo...
1137
1138
1139
1140
  	/*
  	 * Only scan the relevant parts containing pointers to other objects
  	 * to avoid false negatives.
  	 */
f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1141
  	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
7f88f88f8   Catalin Marinas   mm: kmemleak: avo...
1142

db64fe022   Nick Piggin   mm: rewrite vmap ...
1143
  retry:
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1144
  	/*
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1145
1146
1147
1148
1149
1150
  	 * Preload this CPU with one extra vmap_area object. It is used
  	 * when fit type of free area is NE_FIT_TYPE. Please note, it
  	 * does not guarantee that an allocation occurs on a CPU that
  	 * is preloaded, instead we minimize the case when it is not.
  	 * It can happen because of cpu migration, because there is a
  	 * race until the below spinlock is taken.
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1151
1152
1153
  	 *
  	 * The preload is done in non-atomic context, thus it allows us
  	 * to use more permissive allocation masks to be more stable under
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1154
1155
  	 * low memory condition and high memory pressure. In rare case,
  	 * if not preloaded, GFP_NOWAIT is used.
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1156
  	 *
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1157
  	 * Set "pva" to NULL here, because of "retry" path.
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1158
  	 */
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1159
  	pva = NULL;
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1160

81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1161
1162
1163
1164
1165
1166
  	if (!this_cpu_read(ne_fit_preload_node))
  		/*
  		 * Even if it fails we do not really care about that.
  		 * Just proceed as it is. If needed "overflow" path
  		 * will refill the cache we allocate from.
  		 */
f07116d77   Uladzislau Rezki (Sony)   mm/vmalloc: respe...
1167
  		pva = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
82dd23e84   Uladzislau Rezki (Sony)   mm/vmalloc.c: pre...
1168

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1169
  	spin_lock(&free_vmap_area_lock);
81f1ba586   Uladzislau Rezki (Sony)   mm/vmalloc: remov...
1170
1171
1172
  
  	if (pva && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva))
  		kmem_cache_free(vmap_area_cachep, pva);
89699605f   Nick Piggin   mm: vmap area cache
1173

afd07389d   Uladzislau Rezki (Sony)   mm/vmalloc.c: fix...
1174
  	/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1175
1176
  	 * If an allocation fails, the "vend" address is
  	 * returned. Therefore trigger the overflow path.
afd07389d   Uladzislau Rezki (Sony)   mm/vmalloc.c: fix...
1177
  	 */
cacca6baf   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1178
  	addr = __alloc_vmap_area(size, align, vstart, vend);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1179
  	spin_unlock(&free_vmap_area_lock);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1180
  	if (unlikely(addr == vend))
89699605f   Nick Piggin   mm: vmap area cache
1181
  		goto overflow;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1182
1183
1184
  
  	va->va_start = addr;
  	va->va_end = addr + size;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
1185
  	va->vm = NULL;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1186

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1187

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1188
1189
  	spin_lock(&vmap_area_lock);
  	insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1190
  	spin_unlock(&vmap_area_lock);
61e165578   Wang Xiaoqiang   mm/vmalloc.c: use...
1191
  	BUG_ON(!IS_ALIGNED(va->va_start, align));
89699605f   Nick Piggin   mm: vmap area cache
1192
1193
  	BUG_ON(va->va_start < vstart);
  	BUG_ON(va->va_end > vend);
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1194
1195
1196
1197
1198
  	ret = kasan_populate_vmalloc(addr, size);
  	if (ret) {
  		free_vmap_area(va);
  		return ERR_PTR(ret);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1199
  	return va;
89699605f   Nick Piggin   mm: vmap area cache
1200
1201
  
  overflow:
89699605f   Nick Piggin   mm: vmap area cache
1202
1203
1204
1205
1206
  	if (!purged) {
  		purge_vmap_area_lazy();
  		purged = 1;
  		goto retry;
  	}
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1207
1208
1209
1210
1211
1212
1213
1214
1215
  
  	if (gfpflags_allow_blocking(gfp_mask)) {
  		unsigned long freed = 0;
  		blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
  		if (freed > 0) {
  			purged = 0;
  			goto retry;
  		}
  	}
03497d761   Florian Fainelli   mm: Silence vmap(...
1216
  	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
756a025f0   Joe Perches   mm: coalesce spli...
1217
1218
1219
  		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size
  ",
  			size);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1220
1221
  
  	kmem_cache_free(vmap_area_cachep, va);
89699605f   Nick Piggin   mm: vmap area cache
1222
  	return ERR_PTR(-EBUSY);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1223
  }
4da56b99d   Chris Wilson   mm/vmap: Add a no...
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
  int register_vmap_purge_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_register(&vmap_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);
  
  int unregister_vmap_purge_notifier(struct notifier_block *nb)
  {
  	return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
  }
  EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1235
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
   * lazy_max_pages is the maximum amount of virtual address space we gather up
   * before attempting to purge with a TLB flush.
   *
   * There is a tradeoff here: a larger number will cover more kernel page tables
   * and take slightly longer to purge, but it will linearly reduce the number of
   * global TLB flushes that must be performed. It would seem natural to scale
   * this number up linearly with the number of CPUs (because vmapping activity
   * could also scale linearly with the number of CPUs), however it is likely
   * that in practice, workloads might be constrained in other ways that mean
   * vmap activity will not scale linearly with CPUs. Also, I want to be
   * conservative and not introduce a big latency on huge systems, so go with
   * a less aggressive log scale. It will still be an improvement over the old
   * code, and it will be simple to change the scale factor if we find that it
   * becomes a problem on bigger systems.
   */
  static unsigned long lazy_max_pages(void)
  {
  	unsigned int log;
  
  	log = fls(num_online_cpus());
  
  	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
  }
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1259
  static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1260

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1261
1262
1263
1264
1265
  /*
   * Serialize vmap purging.  There is no actual criticial section protected
   * by this look, but we want to avoid concurrent calls for performance
   * reasons and to make the pcpu_get_vm_areas more deterministic.
   */
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1266
  static DEFINE_MUTEX(vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1267

02b709df8   Nick Piggin   mm: purge fragmen...
1268
1269
  /* for per-CPU blocks */
  static void purge_fragmented_blocks_allcpus(void);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1270
  /*
3ee48b6af   Cliff Wickman   mm, x86: Saving v...
1271
1272
1273
1274
1275
   * called before a call to iounmap() if the caller wants vm_area_struct's
   * immediately freed.
   */
  void set_iounmap_nonlazy(void)
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1276
  	atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
3ee48b6af   Cliff Wickman   mm, x86: Saving v...
1277
1278
1279
  }
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1280
   * Purges all lazily-freed vmap areas.
db64fe022   Nick Piggin   mm: rewrite vmap ...
1281
   */
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1282
  static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1283
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1284
  	unsigned long resched_threshold;
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1285
  	struct llist_node *valist;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1286
  	struct vmap_area *va;
cbb766766   Vegard Nossum   mm: fix lazy vmap...
1287
  	struct vmap_area *n_va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1288

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1289
  	lockdep_assert_held(&vmap_purge_lock);
02b709df8   Nick Piggin   mm: purge fragmen...
1290

80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1291
  	valist = llist_del_all(&vmap_purge_list);
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1292
1293
1294
1295
1296
1297
1298
  	if (unlikely(valist == NULL))
  		return false;
  
  	/*
  	 * TODO: to calculate a flush range without looping.
  	 * The list can be up to lazy_max_pages() elements.
  	 */
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1299
  	llist_for_each_entry(va, valist, purge_list) {
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1300
1301
1302
1303
  		if (va->va_start < start)
  			start = va->va_start;
  		if (va->va_end > end)
  			end = va->va_end;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1304
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1305

0574ecd14   Christoph Hellwig   mm: refactor __pu...
1306
  	flush_tlb_kernel_range(start, end);
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1307
  	resched_threshold = lazy_max_pages() << 1;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1308

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1309
  	spin_lock(&free_vmap_area_lock);
763b218dd   Joel Fernandes   mm: add preempt p...
1310
  	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1311
  		unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1312
1313
  		unsigned long orig_start = va->va_start;
  		unsigned long orig_end = va->va_end;
763b218dd   Joel Fernandes   mm: add preempt p...
1314

dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1315
1316
1317
1318
1319
  		/*
  		 * Finally insert or merge lazily-freed area. It is
  		 * detached and there is no need to "unlink" it from
  		 * anything.
  		 */
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1320
1321
  		va = merge_or_add_vmap_area(va, &free_vmap_area_root,
  					    &free_vmap_area_list);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
1322
1323
  		if (!va)
  			continue;
3c5c3cfb9   Daniel Axtens   kasan: support ba...
1324
1325
1326
  		if (is_vmalloc_or_module_addr((void *)orig_start))
  			kasan_release_vmalloc(orig_start, orig_end,
  					      va->va_start, va->va_end);
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1327

4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1328
  		atomic_long_sub(nr, &vmap_lazy_nr);
68571be99   Uladzislau Rezki (Sony)   mm/vmalloc.c: add...
1329

4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1330
  		if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1331
  			cond_resched_lock(&free_vmap_area_lock);
763b218dd   Joel Fernandes   mm: add preempt p...
1332
  	}
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1333
  	spin_unlock(&free_vmap_area_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1334
  	return true;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1335
1336
1337
  }
  
  /*
496850e5f   Nick Piggin   mm: vmalloc failu...
1338
1339
1340
1341
1342
   * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
   * is already purging.
   */
  static void try_purge_vmap_area_lazy(void)
  {
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1343
  	if (mutex_trylock(&vmap_purge_lock)) {
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1344
  		__purge_vmap_area_lazy(ULONG_MAX, 0);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1345
  		mutex_unlock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1346
  	}
496850e5f   Nick Piggin   mm: vmalloc failu...
1347
1348
1349
  }
  
  /*
db64fe022   Nick Piggin   mm: rewrite vmap ...
1350
1351
1352
1353
   * Kick off a purge of the outstanding lazy areas.
   */
  static void purge_vmap_area_lazy(void)
  {
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1354
  	mutex_lock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1355
1356
  	purge_fragmented_blocks_allcpus();
  	__purge_vmap_area_lazy(ULONG_MAX, 0);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1357
  	mutex_unlock(&vmap_purge_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1358
1359
1360
  }
  
  /*
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1361
1362
1363
   * Free a vmap area, caller ensuring that the area has been unmapped
   * and flush_cache_vunmap had been called for the correct range
   * previously.
db64fe022   Nick Piggin   mm: rewrite vmap ...
1364
   */
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1365
  static void free_vmap_area_noflush(struct vmap_area *va)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1366
  {
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1367
  	unsigned long nr_lazy;
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1368

dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
1369
1370
1371
  	spin_lock(&vmap_area_lock);
  	unlink_va(va, &vmap_area_root);
  	spin_unlock(&vmap_area_lock);
4d36e6f80   Uladzislau Rezki (Sony)   mm/vmalloc.c: con...
1372
1373
  	nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
  				PAGE_SHIFT, &vmap_lazy_nr);
80c4bd7a5   Chris Wilson   mm/vmalloc: keep ...
1374
1375
1376
1377
1378
  
  	/* After this point, we may free va at any time */
  	llist_add(&va->purge_list, &vmap_purge_list);
  
  	if (unlikely(nr_lazy > lazy_max_pages()))
496850e5f   Nick Piggin   mm: vmalloc failu...
1379
  		try_purge_vmap_area_lazy();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1380
  }
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1381
1382
1383
1384
1385
1386
  /*
   * Free and unmap a vmap area
   */
  static void free_unmap_vmap_area(struct vmap_area *va)
  {
  	flush_cache_vunmap(va->va_start, va->va_end);
855e57a11   Christoph Hellwig   mm: remove unmap_...
1387
  	unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start);
8e57f8acb   Vlastimil Babka   mm, debug_pageall...
1388
  	if (debug_pagealloc_enabled_static())
82a2e924f   Chintan Pandya   mm: vmalloc: clea...
1389
  		flush_tlb_kernel_range(va->va_start, va->va_end);
c8eef01e2   Christoph Hellwig   mm: remove free_u...
1390
  	free_vmap_area_noflush(va);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1391
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
  static struct vmap_area *find_vmap_area(unsigned long addr)
  {
  	struct vmap_area *va;
  
  	spin_lock(&vmap_area_lock);
  	va = __find_vmap_area(addr);
  	spin_unlock(&vmap_area_lock);
  
  	return va;
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
  /*** Per cpu kva allocator ***/
  
  /*
   * vmap space is limited especially on 32 bit architectures. Ensure there is
   * room for at least 16 percpu vmap blocks per CPU.
   */
  /*
   * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
   * to #define VMALLOC_SPACE		(VMALLOC_END-VMALLOC_START). Guess
   * instead (we just need a rough idea)
   */
  #if BITS_PER_LONG == 32
  #define VMALLOC_SPACE		(128UL*1024*1024)
  #else
  #define VMALLOC_SPACE		(128UL*1024*1024*1024)
  #endif
  
  #define VMALLOC_PAGES		(VMALLOC_SPACE / PAGE_SIZE)
  #define VMAP_MAX_ALLOC		BITS_PER_LONG	/* 256K with 4K pages */
  #define VMAP_BBMAP_BITS_MAX	1024	/* 4MB with 4K pages */
  #define VMAP_BBMAP_BITS_MIN	(VMAP_MAX_ALLOC*2)
  #define VMAP_MIN(x, y)		((x) < (y) ? (x) : (y)) /* can't use min() */
  #define VMAP_MAX(x, y)		((x) > (y) ? (x) : (y)) /* can't use max() */
f982f9151   Clemens Ladisch   mm: fix wrong vma...
1425
1426
1427
1428
  #define VMAP_BBMAP_BITS		\
  		VMAP_MIN(VMAP_BBMAP_BITS_MAX,	\
  		VMAP_MAX(VMAP_BBMAP_BITS_MIN,	\
  			VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
db64fe022   Nick Piggin   mm: rewrite vmap ...
1429
1430
1431
1432
1433
1434
  
  #define VMAP_BLOCK_SIZE		(VMAP_BBMAP_BITS * PAGE_SIZE)
  
  struct vmap_block_queue {
  	spinlock_t lock;
  	struct list_head free;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1435
1436
1437
1438
1439
  };
  
  struct vmap_block {
  	spinlock_t lock;
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1440
  	unsigned long free, dirty;
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1441
  	unsigned long dirty_min, dirty_max; /*< dirty range */
de5604231   Nick Piggin   mm: percpu-vmap f...
1442
1443
  	struct list_head free_list;
  	struct rcu_head rcu_head;
02b709df8   Nick Piggin   mm: purge fragmen...
1444
  	struct list_head purge;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1445
1446
1447
1448
1449
1450
  };
  
  /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
  static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
  
  /*
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1451
   * XArray of vmap blocks, indexed by address, to quickly find a vmap block
db64fe022   Nick Piggin   mm: rewrite vmap ...
1452
1453
1454
   * in the free path. Could get rid of this if we change the API to return a
   * "cookie" from alloc, to be passed to free. But no big deal yet.
   */
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1455
  static DEFINE_XARRAY(vmap_blocks);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
  
  /*
   * We should probably have a fallback mechanism to allocate virtual memory
   * out of partially filled vmap blocks. However vmap block sizing should be
   * fairly reasonable according to the vmalloc size, so it shouldn't be a
   * big problem.
   */
  
  static unsigned long addr_to_vb_idx(unsigned long addr)
  {
  	addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
  	addr /= VMAP_BLOCK_SIZE;
  	return addr;
  }
cf725ce27   Roman Pen   mm/vmalloc: occup...
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
  static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
  {
  	unsigned long addr;
  
  	addr = va_start + (pages_off << PAGE_SHIFT);
  	BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
  	return (void *)addr;
  }
  
  /**
   * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
   *                  block. Of course pages number can't exceed VMAP_BBMAP_BITS
   * @order:    how many 2^order pages should be occupied in newly allocated block
   * @gfp_mask: flags for the page level allocator
   *
a862f68a8   Mike Rapoport   docs/core-api/mm:...
1485
   * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
cf725ce27   Roman Pen   mm/vmalloc: occup...
1486
1487
   */
  static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1488
1489
1490
1491
1492
1493
  {
  	struct vmap_block_queue *vbq;
  	struct vmap_block *vb;
  	struct vmap_area *va;
  	unsigned long vb_idx;
  	int node, err;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1494
  	void *vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
  
  	node = numa_node_id();
  
  	vb = kmalloc_node(sizeof(struct vmap_block),
  			gfp_mask & GFP_RECLAIM_MASK, node);
  	if (unlikely(!vb))
  		return ERR_PTR(-ENOMEM);
  
  	va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
  					VMALLOC_START, VMALLOC_END,
  					node, gfp_mask);
ddf9c6d47   Tobias Klauser   vmalloc: remove r...
1506
  	if (IS_ERR(va)) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1507
  		kfree(vb);
e7d863407   Julia Lawall   mm: use ERR_CAST
1508
  		return ERR_CAST(va);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1509
  	}
cf725ce27   Roman Pen   mm/vmalloc: occup...
1510
  	vaddr = vmap_block_vaddr(va->va_start, 0);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1511
1512
  	spin_lock_init(&vb->lock);
  	vb->va = va;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1513
1514
1515
  	/* At least something should be left free */
  	BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
  	vb->free = VMAP_BBMAP_BITS - (1UL << order);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1516
  	vb->dirty = 0;
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1517
1518
  	vb->dirty_min = VMAP_BBMAP_BITS;
  	vb->dirty_max = 0;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1519
  	INIT_LIST_HEAD(&vb->free_list);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1520
1521
  
  	vb_idx = addr_to_vb_idx(va->va_start);
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1522
1523
1524
1525
1526
1527
  	err = xa_insert(&vmap_blocks, vb_idx, vb, gfp_mask);
  	if (err) {
  		kfree(vb);
  		free_vmap_area(va);
  		return ERR_PTR(err);
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1528
1529
  
  	vbq = &get_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1530
  	spin_lock(&vbq->lock);
68ac546f2   Roman Pen   mm/vmalloc: fix p...
1531
  	list_add_tail_rcu(&vb->free_list, &vbq->free);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1532
  	spin_unlock(&vbq->lock);
3f04ba859   Tejun Heo   vmalloc: fix use ...
1533
  	put_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1534

cf725ce27   Roman Pen   mm/vmalloc: occup...
1535
  	return vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1536
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1537
1538
1539
  static void free_vmap_block(struct vmap_block *vb)
  {
  	struct vmap_block *tmp;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1540

0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1541
  	tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1542
  	BUG_ON(tmp != vb);
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1543
  	free_vmap_area_noflush(vb->va);
22a3c7d18   Lai Jiangshan   vmalloc,rcu: Conv...
1544
  	kfree_rcu(vb, rcu_head);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1545
  }
02b709df8   Nick Piggin   mm: purge fragmen...
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
  static void purge_fragmented_blocks(int cpu)
  {
  	LIST_HEAD(purge);
  	struct vmap_block *vb;
  	struct vmap_block *n_vb;
  	struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
  
  	rcu_read_lock();
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  
  		if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
  			continue;
  
  		spin_lock(&vb->lock);
  		if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
  			vb->free = 0; /* prevent further allocs after releasing lock */
  			vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1563
1564
  			vb->dirty_min = 0;
  			vb->dirty_max = VMAP_BBMAP_BITS;
02b709df8   Nick Piggin   mm: purge fragmen...
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
  			spin_lock(&vbq->lock);
  			list_del_rcu(&vb->free_list);
  			spin_unlock(&vbq->lock);
  			spin_unlock(&vb->lock);
  			list_add_tail(&vb->purge, &purge);
  		} else
  			spin_unlock(&vb->lock);
  	}
  	rcu_read_unlock();
  
  	list_for_each_entry_safe(vb, n_vb, &purge, purge) {
  		list_del(&vb->purge);
  		free_vmap_block(vb);
  	}
  }
02b709df8   Nick Piggin   mm: purge fragmen...
1580
1581
1582
1583
1584
1585
1586
  static void purge_fragmented_blocks_allcpus(void)
  {
  	int cpu;
  
  	for_each_possible_cpu(cpu)
  		purge_fragmented_blocks(cpu);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1587
1588
1589
1590
  static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  {
  	struct vmap_block_queue *vbq;
  	struct vmap_block *vb;
cf725ce27   Roman Pen   mm/vmalloc: occup...
1591
  	void *vaddr = NULL;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1592
  	unsigned int order;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1593
  	BUG_ON(offset_in_page(size));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1594
  	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
aa91c4d89   Jan Kara   mm: make vb_alloc...
1595
1596
1597
1598
1599
1600
1601
1602
  	if (WARN_ON(size == 0)) {
  		/*
  		 * Allocating 0 bytes isn't what caller wants since
  		 * get_order(0) returns funny result. Just warn and terminate
  		 * early.
  		 */
  		return NULL;
  	}
db64fe022   Nick Piggin   mm: rewrite vmap ...
1603
  	order = get_order(size);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1604
1605
1606
  	rcu_read_lock();
  	vbq = &get_cpu_var(vmap_block_queue);
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
cf725ce27   Roman Pen   mm/vmalloc: occup...
1607
  		unsigned long pages_off;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1608
1609
  
  		spin_lock(&vb->lock);
cf725ce27   Roman Pen   mm/vmalloc: occup...
1610
1611
1612
1613
  		if (vb->free < (1UL << order)) {
  			spin_unlock(&vb->lock);
  			continue;
  		}
02b709df8   Nick Piggin   mm: purge fragmen...
1614

cf725ce27   Roman Pen   mm/vmalloc: occup...
1615
1616
  		pages_off = VMAP_BBMAP_BITS - vb->free;
  		vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
02b709df8   Nick Piggin   mm: purge fragmen...
1617
1618
1619
1620
1621
1622
  		vb->free -= 1UL << order;
  		if (vb->free == 0) {
  			spin_lock(&vbq->lock);
  			list_del_rcu(&vb->free_list);
  			spin_unlock(&vbq->lock);
  		}
cf725ce27   Roman Pen   mm/vmalloc: occup...
1623

02b709df8   Nick Piggin   mm: purge fragmen...
1624
1625
  		spin_unlock(&vb->lock);
  		break;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1626
  	}
02b709df8   Nick Piggin   mm: purge fragmen...
1627

3f04ba859   Tejun Heo   vmalloc: fix use ...
1628
  	put_cpu_var(vmap_block_queue);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1629
  	rcu_read_unlock();
cf725ce27   Roman Pen   mm/vmalloc: occup...
1630
1631
1632
  	/* Allocate new block if nothing was found */
  	if (!vaddr)
  		vaddr = new_vmap_block(order, gfp_mask);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1633

cf725ce27   Roman Pen   mm/vmalloc: occup...
1634
  	return vaddr;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1635
  }
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1636
  static void vb_free(unsigned long addr, unsigned long size)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1637
1638
  {
  	unsigned long offset;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1639
1640
  	unsigned int order;
  	struct vmap_block *vb;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
1641
  	BUG_ON(offset_in_page(size));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1642
  	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1643

78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1644
  	flush_cache_vunmap(addr, addr + size);
b29acbdcf   Nick Piggin   mm: vmalloc fix l...
1645

db64fe022   Nick Piggin   mm: rewrite vmap ...
1646
  	order = get_order(size);
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1647
  	offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
0f14599c6   Matthew Wilcox (Oracle)   vmalloc: convert ...
1648
  	vb = xa_load(&vmap_blocks, addr_to_vb_idx(addr));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1649

b521c43f5   Christoph Hellwig   mm: remove vmap_p...
1650
  	unmap_kernel_range_noflush(addr, size);
64141da58   Jeremy Fitzhardinge   vmalloc: eagerly ...
1651

8e57f8acb   Vlastimil Babka   mm, debug_pageall...
1652
  	if (debug_pagealloc_enabled_static())
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1653
  		flush_tlb_kernel_range(addr, addr + size);
82a2e924f   Chintan Pandya   mm: vmalloc: clea...
1654

db64fe022   Nick Piggin   mm: rewrite vmap ...
1655
  	spin_lock(&vb->lock);
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1656
1657
1658
1659
  
  	/* Expand dirty range */
  	vb->dirty_min = min(vb->dirty_min, offset);
  	vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
d086817dc   MinChan Kim   vmap: remove need...
1660

db64fe022   Nick Piggin   mm: rewrite vmap ...
1661
1662
  	vb->dirty += 1UL << order;
  	if (vb->dirty == VMAP_BBMAP_BITS) {
de5604231   Nick Piggin   mm: percpu-vmap f...
1663
  		BUG_ON(vb->free);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1664
1665
1666
1667
1668
  		spin_unlock(&vb->lock);
  		free_vmap_block(vb);
  	} else
  		spin_unlock(&vb->lock);
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
1669
  static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1670
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1671
  	int cpu;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1672

9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1673
1674
  	if (unlikely(!vmap_initialized))
  		return;
5803ed292   Christoph Hellwig   mm: mark all call...
1675
  	might_sleep();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1676
1677
1678
1679
1680
1681
  	for_each_possible_cpu(cpu) {
  		struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
  		struct vmap_block *vb;
  
  		rcu_read_lock();
  		list_for_each_entry_rcu(vb, &vbq->free, free_list) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1682
  			spin_lock(&vb->lock);
7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1683
1684
  			if (vb->dirty) {
  				unsigned long va_start = vb->va->va_start;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1685
  				unsigned long s, e;
b136be5e0   Joonsoo Kim   mm, vmalloc: use ...
1686

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1687
1688
  				s = va_start + (vb->dirty_min << PAGE_SHIFT);
  				e = va_start + (vb->dirty_max << PAGE_SHIFT);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1689

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1690
1691
  				start = min(s, start);
  				end   = max(e, end);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1692

7d61bfe8f   Roman Pen   mm/vmalloc: get r...
1693
  				flush = 1;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1694
1695
1696
1697
1698
  			}
  			spin_unlock(&vb->lock);
  		}
  		rcu_read_unlock();
  	}
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1699
  	mutex_lock(&vmap_purge_lock);
0574ecd14   Christoph Hellwig   mm: refactor __pu...
1700
1701
1702
  	purge_fragmented_blocks_allcpus();
  	if (!__purge_vmap_area_lazy(start, end) && flush)
  		flush_tlb_kernel_range(start, end);
f9e099776   Christoph Hellwig   mm: turn vmap_pur...
1703
  	mutex_unlock(&vmap_purge_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1704
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
  
  /**
   * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
   *
   * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
   * to amortize TLB flushing overheads. What this means is that any page you
   * have now, may, in a former life, have been mapped into kernel virtual
   * address by the vmap layer and so there might be some CPUs with TLB entries
   * still referencing that page (additional to the regular 1:1 kernel mapping).
   *
   * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
   * be sure that none of the pages we have control over will have any aliases
   * from the vmap layer.
   */
  void vm_unmap_aliases(void)
  {
  	unsigned long start = ULONG_MAX, end = 0;
  	int flush = 0;
  
  	_vm_unmap_aliases(start, end, flush);
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1726
1727
1728
1729
1730
1731
1732
1733
1734
  EXPORT_SYMBOL_GPL(vm_unmap_aliases);
  
  /**
   * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
   * @mem: the pointer returned by vm_map_ram
   * @count: the count passed to that vm_map_ram call (cannot unmap partial)
   */
  void vm_unmap_ram(const void *mem, unsigned int count)
  {
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
1735
  	unsigned long size = (unsigned long)count << PAGE_SHIFT;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1736
  	unsigned long addr = (unsigned long)mem;
9c3acf604   Christoph Hellwig   mm: remove free_u...
1737
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1738

5803ed292   Christoph Hellwig   mm: mark all call...
1739
  	might_sleep();
db64fe022   Nick Piggin   mm: rewrite vmap ...
1740
1741
1742
  	BUG_ON(!addr);
  	BUG_ON(addr < VMALLOC_START);
  	BUG_ON(addr > VMALLOC_END);
a1c0b1a07   Shawn Lin   mm/vmalloc: use P...
1743
  	BUG_ON(!PAGE_ALIGNED(addr));
db64fe022   Nick Piggin   mm: rewrite vmap ...
1744

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1745
  	kasan_poison_vmalloc(mem, size);
9c3acf604   Christoph Hellwig   mm: remove free_u...
1746
  	if (likely(count <= VMAP_MAX_ALLOC)) {
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
1747
  		debug_check_no_locks_freed(mem, size);
78a0e8c48   Christoph Hellwig   mm: pass addr as ...
1748
  		vb_free(addr, size);
9c3acf604   Christoph Hellwig   mm: remove free_u...
1749
1750
1751
1752
1753
  		return;
  	}
  
  	va = find_vmap_area(addr);
  	BUG_ON(!va);
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
1754
1755
  	debug_check_no_locks_freed((void *)va->va_start,
  				    (va->va_end - va->va_start));
9c3acf604   Christoph Hellwig   mm: remove free_u...
1756
  	free_unmap_vmap_area(va);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1757
1758
1759
1760
1761
1762
1763
1764
  }
  EXPORT_SYMBOL(vm_unmap_ram);
  
  /**
   * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
   * @pages: an array of pointers to the pages to be mapped
   * @count: number of pages
   * @node: prefer to allocate data structures on this node
e99c97ade   Randy Dunlap   mm: fix kernel-do...
1765
   *
364376383   Gioh Kim   mm/vmalloc.c: enh...
1766
1767
1768
1769
1770
1771
   * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
   * faster than vmap so it's good.  But if you mix long-life and short-life
   * objects with vm_map_ram(), it could consume lots of address space through
   * fragmentation (especially on a 32bit machine).  You could see failures in
   * the end.  Please use this function for short-lived objects.
   *
e99c97ade   Randy Dunlap   mm: fix kernel-do...
1772
   * Returns: a pointer to the address that has been mapped, or %NULL on failure
db64fe022   Nick Piggin   mm: rewrite vmap ...
1773
   */
d4efd79a8   Christoph Hellwig   mm: remove the pr...
1774
  void *vm_map_ram(struct page **pages, unsigned int count, int node)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1775
  {
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
1776
  	unsigned long size = (unsigned long)count << PAGE_SHIFT;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
  	unsigned long addr;
  	void *mem;
  
  	if (likely(count <= VMAP_MAX_ALLOC)) {
  		mem = vb_alloc(size, GFP_KERNEL);
  		if (IS_ERR(mem))
  			return NULL;
  		addr = (unsigned long)mem;
  	} else {
  		struct vmap_area *va;
  		va = alloc_vmap_area(size, PAGE_SIZE,
  				VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
  		if (IS_ERR(va))
  			return NULL;
  
  		addr = va->va_start;
  		mem = (void *)addr;
  	}
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1795
1796
  
  	kasan_unpoison_vmalloc(mem, size);
d4efd79a8   Christoph Hellwig   mm: remove the pr...
1797
  	if (map_kernel_range(addr, size, PAGE_KERNEL, pages) < 0) {
db64fe022   Nick Piggin   mm: rewrite vmap ...
1798
1799
1800
1801
1802
1803
  		vm_unmap_ram(mem, count);
  		return NULL;
  	}
  	return mem;
  }
  EXPORT_SYMBOL(vm_map_ram);
4341fa454   Joonsoo Kim   mm, vmalloc: remo...
1804
  static struct vm_struct *vmlist __initdata;
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
1805

f0aa66179   Tejun Heo   vmalloc: implemen...
1806
  /**
be9b7335e   Nicolas Pitre   mm: add vm_area_a...
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
   * vm_area_add_early - add vmap area early during boot
   * @vm: vm_struct to add
   *
   * This function is used to add fixed kernel vm area to vmlist before
   * vmalloc_init() is called.  @vm->addr, @vm->size, and @vm->flags
   * should contain proper values and the other fields should be zero.
   *
   * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
   */
  void __init vm_area_add_early(struct vm_struct *vm)
  {
  	struct vm_struct *tmp, **p;
  
  	BUG_ON(vmap_initialized);
  	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
  		if (tmp->addr >= vm->addr) {
  			BUG_ON(tmp->addr < vm->addr + vm->size);
  			break;
  		} else
  			BUG_ON(tmp->addr + tmp->size > vm->addr);
  	}
  	vm->next = *p;
  	*p = vm;
  }
  
  /**
f0aa66179   Tejun Heo   vmalloc: implemen...
1833
1834
   * vm_area_register_early - register vmap area early during boot
   * @vm: vm_struct to register
c0c0a2937   Tejun Heo   vmalloc: add @ali...
1835
   * @align: requested alignment
f0aa66179   Tejun Heo   vmalloc: implemen...
1836
1837
1838
1839
1840
1841
1842
1843
   *
   * This function is used to register kernel vm area before
   * vmalloc_init() is called.  @vm->size and @vm->flags should contain
   * proper values on entry and other fields should be zero.  On return,
   * vm->addr contains the allocated address.
   *
   * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
   */
c0c0a2937   Tejun Heo   vmalloc: add @ali...
1844
  void __init vm_area_register_early(struct vm_struct *vm, size_t align)
f0aa66179   Tejun Heo   vmalloc: implemen...
1845
1846
  {
  	static size_t vm_init_off __initdata;
c0c0a2937   Tejun Heo   vmalloc: add @ali...
1847
1848
1849
1850
  	unsigned long addr;
  
  	addr = ALIGN(VMALLOC_START + vm_init_off, align);
  	vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
f0aa66179   Tejun Heo   vmalloc: implemen...
1851

c0c0a2937   Tejun Heo   vmalloc: add @ali...
1852
  	vm->addr = (void *)addr;
f0aa66179   Tejun Heo   vmalloc: implemen...
1853

be9b7335e   Nicolas Pitre   mm: add vm_area_a...
1854
  	vm_area_add_early(vm);
f0aa66179   Tejun Heo   vmalloc: implemen...
1855
  }
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
  static void vmap_init_free_space(void)
  {
  	unsigned long vmap_start = 1;
  	const unsigned long vmap_end = ULONG_MAX;
  	struct vmap_area *busy, *free;
  
  	/*
  	 *     B     F     B     B     B     F
  	 * -|-----|.....|-----|-----|-----|.....|-
  	 *  |           The KVA space           |
  	 *  |<--------------------------------->|
  	 */
  	list_for_each_entry(busy, &vmap_area_list, list) {
  		if (busy->va_start - vmap_start > 0) {
  			free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  			if (!WARN_ON_ONCE(!free)) {
  				free->va_start = vmap_start;
  				free->va_end = busy->va_start;
  
  				insert_vmap_area_augment(free, NULL,
  					&free_vmap_area_root,
  						&free_vmap_area_list);
  			}
  		}
  
  		vmap_start = busy->va_end;
  	}
  
  	if (vmap_end - vmap_start > 0) {
  		free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  		if (!WARN_ON_ONCE(!free)) {
  			free->va_start = vmap_start;
  			free->va_end = vmap_end;
  
  			insert_vmap_area_augment(free, NULL,
  				&free_vmap_area_root,
  					&free_vmap_area_list);
  		}
  	}
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1896
1897
  void __init vmalloc_init(void)
  {
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1898
1899
  	struct vmap_area *va;
  	struct vm_struct *tmp;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1900
  	int i;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1901
1902
1903
1904
  	/*
  	 * Create the cache for vmap_area objects.
  	 */
  	vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1905
1906
  	for_each_possible_cpu(i) {
  		struct vmap_block_queue *vbq;
32fcfd407   Al Viro   make vfree() safe...
1907
  		struct vfree_deferred *p;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1908
1909
1910
1911
  
  		vbq = &per_cpu(vmap_block_queue, i);
  		spin_lock_init(&vbq->lock);
  		INIT_LIST_HEAD(&vbq->free);
32fcfd407   Al Viro   make vfree() safe...
1912
1913
1914
  		p = &per_cpu(vfree_deferred, i);
  		init_llist_head(&p->list);
  		INIT_WORK(&p->wq, free_work);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1915
  	}
9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1916

822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1917
1918
  	/* Import existing vmlist entries. */
  	for (tmp = vmlist; tmp; tmp = tmp->next) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1919
1920
1921
  		va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
  		if (WARN_ON_ONCE(!va))
  			continue;
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1922
1923
  		va->va_start = (unsigned long)tmp->addr;
  		va->va_end = va->va_start + tmp->size;
dbda591d9   KyongHo   mm: fix faulty in...
1924
  		va->vm = tmp;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1925
  		insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
822c18f2e   Ivan Kokshaysky   alpha: fix vmallo...
1926
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
1927

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
1928
1929
1930
1931
  	/*
  	 * Now we can initialize a free vmap space.
  	 */
  	vmap_init_free_space();
9b4633340   Jeremy Fitzhardinge   vmap: cope with v...
1932
  	vmap_initialized = true;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1933
  }
8fc489850   Tejun Heo   vmalloc: add un/m...
1934
  /**
8fc489850   Tejun Heo   vmalloc: add un/m...
1935
1936
1937
1938
1939
1940
1941
   * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
   * @addr: start of the VM area to unmap
   * @size: size of the VM area to unmap
   *
   * Similar to unmap_kernel_range_noflush() but flushes vcache before
   * the unmapping and tlb after.
   */
db64fe022   Nick Piggin   mm: rewrite vmap ...
1942
1943
1944
  void unmap_kernel_range(unsigned long addr, unsigned long size)
  {
  	unsigned long end = addr + size;
f6fcba701   Tejun Heo   vmalloc: call flu...
1945
1946
  
  	flush_cache_vunmap(addr, end);
b521c43f5   Christoph Hellwig   mm: remove vmap_p...
1947
  	unmap_kernel_range_noflush(addr, size);
db64fe022   Nick Piggin   mm: rewrite vmap ...
1948
1949
  	flush_tlb_kernel_range(addr, end);
  }
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1950
1951
  static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
  	struct vmap_area *va, unsigned long flags, const void *caller)
cf88c7900   Tejun Heo   vmalloc: separate...
1952
  {
cf88c7900   Tejun Heo   vmalloc: separate...
1953
1954
1955
1956
  	vm->flags = flags;
  	vm->addr = (void *)va->va_start;
  	vm->size = va->va_end - va->va_start;
  	vm->caller = caller;
db1aecafe   Minchan Kim   mm/vmalloc.c: cha...
1957
  	va->vm = vm;
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
1958
1959
1960
1961
1962
1963
1964
  }
  
  static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
  			      unsigned long flags, const void *caller)
  {
  	spin_lock(&vmap_area_lock);
  	setup_vmalloc_vm_locked(vm, va, flags, caller);
c69480ade   Joonsoo Kim   mm, vmalloc: prot...
1965
  	spin_unlock(&vmap_area_lock);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
1966
  }
cf88c7900   Tejun Heo   vmalloc: separate...
1967

20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
1968
  static void clear_vm_uninitialized_flag(struct vm_struct *vm)
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
1969
  {
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
1970
  	/*
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
1971
  	 * Before removing VM_UNINITIALIZED,
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
1972
1973
1974
1975
  	 * we should make sure that vm has proper values.
  	 * Pair with smp_rmb() in show_numa_info().
  	 */
  	smp_wmb();
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
1976
  	vm->flags &= ~VM_UNINITIALIZED;
cf88c7900   Tejun Heo   vmalloc: separate...
1977
  }
db64fe022   Nick Piggin   mm: rewrite vmap ...
1978
  static struct vm_struct *__get_vm_area_node(unsigned long size,
2dca6999e   David Miller   mm, perf_event: M...
1979
  		unsigned long align, unsigned long flags, unsigned long start,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
1980
  		unsigned long end, int node, gfp_t gfp_mask, const void *caller)
db64fe022   Nick Piggin   mm: rewrite vmap ...
1981
  {
0006526d7   Kautuk Consul   mm/vmalloc.c: rem...
1982
  	struct vmap_area *va;
db64fe022   Nick Piggin   mm: rewrite vmap ...
1983
  	struct vm_struct *area;
d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
1984
  	unsigned long requested_size = size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1985

52fd24ca1   Giridhar Pemmasani   [PATCH] __vmalloc...
1986
  	BUG_ON(in_interrupt());
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1987
  	size = PAGE_ALIGN(size);
31be83095   OGAWA Hirofumi   [PATCH] Fix stran...
1988
1989
  	if (unlikely(!size))
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1990

252e5c6e2   zijun_hu   mm/vmalloc.c: fix...
1991
1992
1993
  	if (flags & VM_IOREMAP)
  		align = 1ul << clamp_t(int, get_count_order_long(size),
  				       PAGE_SHIFT, IOREMAP_MAX_ORDER);
cf88c7900   Tejun Heo   vmalloc: separate...
1994
  	area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1995
1996
  	if (unlikely(!area))
  		return NULL;
71394fe50   Andrey Ryabinin   mm: vmalloc: add ...
1997
1998
  	if (!(flags & VM_NO_GUARD))
  		size += PAGE_SIZE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1999

db64fe022   Nick Piggin   mm: rewrite vmap ...
2000
2001
2002
2003
  	va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
  	if (IS_ERR(va)) {
  		kfree(area);
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2004
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2005

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2006
  	kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2007

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2008
  	setup_vmalloc_vm(area, va, flags, caller);
3c5c3cfb9   Daniel Axtens   kasan: support ba...
2009

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2010
  	return area;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2011
  }
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2012
2013
  struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
  				       unsigned long start, unsigned long end,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
2014
  				       const void *caller)
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2015
  {
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2016
2017
  	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
  				  GFP_KERNEL, caller);
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2018
  }
1ac80b081   Jan Kiszka   mm: vmalloc: Expo...
2019
  EXPORT_SYMBOL_GPL(__get_vm_area_caller);
c29686129   Benjamin Herrenschmidt   vmalloc: add __ge...
2020

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2021
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2022
2023
2024
   * get_vm_area - reserve a contiguous kernel virtual area
   * @size:	 size of the area
   * @flags:	 %VM_IOREMAP for I/O mappings or VM_ALLOC
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2025
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2026
2027
2028
   * Search an area of @size in the kernel virtual mapping area,
   * and reserved it for out purposes.  Returns the area descriptor
   * on success or %NULL on failure.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2029
2030
   *
   * Return: the area descriptor on success or %NULL on failure.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2031
2032
2033
   */
  struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
  {
2dca6999e   David Miller   mm, perf_event: M...
2034
  	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2035
2036
  				  NUMA_NO_NODE, GFP_KERNEL,
  				  __builtin_return_address(0));
230169693   Christoph Lameter   vmallocinfo: add ...
2037
2038
2039
  }
  
  struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
5e6cafc83   Marek Szyprowski   mm: vmalloc: use ...
2040
  				const void *caller)
230169693   Christoph Lameter   vmallocinfo: add ...
2041
  {
2dca6999e   David Miller   mm, perf_event: M...
2042
  	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
00ef2d2f8   David Rientjes   mm: use NUMA_NO_NODE
2043
  				  NUMA_NO_NODE, GFP_KERNEL, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2044
  }
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2045
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2046
2047
   * find_vm_area - find a continuous kernel virtual area
   * @addr:	  base address
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2048
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2049
2050
2051
   * Search for the kernel VM area starting at @addr, and return it.
   * It is up to the caller to do all required locking to keep the returned
   * pointer valid.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2052
   *
74640617e   Hui Su   mm/vmalloc.c: fix...
2053
   * Return: the area descriptor on success or %NULL on failure.
e9da6e990   Marek Szyprowski   ARM: dma-mapping:...
2054
2055
   */
  struct vm_struct *find_vm_area(const void *addr)
833423143   Nick Piggin   [PATCH] mm: intro...
2056
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2057
  	struct vmap_area *va;
833423143   Nick Piggin   [PATCH] mm: intro...
2058

db64fe022   Nick Piggin   mm: rewrite vmap ...
2059
  	va = find_vmap_area((unsigned long)addr);
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2060
2061
  	if (!va)
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2062

688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2063
  	return va->vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2064
  }
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2065
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2066
2067
   * remove_vm_area - find and remove a continuous kernel virtual area
   * @addr:	    base address
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2068
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2069
2070
2071
   * Search for the kernel VM area starting at @addr, and remove it.
   * This function returns the found VM area, but using it is NOT safe
   * on SMP machines, except for its size or flags.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2072
   *
74640617e   Hui Su   mm/vmalloc.c: fix...
2073
   * Return: the area descriptor on success or %NULL on failure.
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2074
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2075
  struct vm_struct *remove_vm_area(const void *addr)
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2076
  {
db64fe022   Nick Piggin   mm: rewrite vmap ...
2077
  	struct vmap_area *va;
5803ed292   Christoph Hellwig   mm: mark all call...
2078
  	might_sleep();
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
2079
2080
  	spin_lock(&vmap_area_lock);
  	va = __find_vmap_area((unsigned long)addr);
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2081
  	if (va && va->vm) {
db1aecafe   Minchan Kim   mm/vmalloc.c: cha...
2082
  		struct vm_struct *vm = va->vm;
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2083

c69480ade   Joonsoo Kim   mm, vmalloc: prot...
2084
  		va->vm = NULL;
c69480ade   Joonsoo Kim   mm, vmalloc: prot...
2085
  		spin_unlock(&vmap_area_lock);
a5af5aa8b   Andrey Ryabinin   kasan, module, vm...
2086
  		kasan_free_shadow(vm);
dd32c2799   KAMEZAWA Hiroyuki   vmalloc: unmap vm...
2087
  		free_unmap_vmap_area(va);
dd32c2799   KAMEZAWA Hiroyuki   vmalloc: unmap vm...
2088

db64fe022   Nick Piggin   mm: rewrite vmap ...
2089
2090
  		return vm;
  	}
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
2091
2092
  
  	spin_unlock(&vmap_area_lock);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2093
  	return NULL;
7856dfeb2   Andi Kleen   [PATCH] x86_64: F...
2094
  }
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
  static inline void set_area_direct_map(const struct vm_struct *area,
  				       int (*set_direct_map)(struct page *page))
  {
  	int i;
  
  	for (i = 0; i < area->nr_pages; i++)
  		if (page_address(area->pages[i]))
  			set_direct_map(area->pages[i]);
  }
  
  /* Handle removing and resetting vm mappings related to the vm_struct. */
  static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
  {
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2108
2109
  	unsigned long start = ULONG_MAX, end = 0;
  	int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2110
  	int flush_dmap = 0;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2111
  	int i;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
  	remove_vm_area(area->addr);
  
  	/* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
  	if (!flush_reset)
  		return;
  
  	/*
  	 * If not deallocating pages, just do the flush of the VM area and
  	 * return.
  	 */
  	if (!deallocate_pages) {
  		vm_unmap_aliases();
  		return;
  	}
  
  	/*
  	 * If execution gets here, flush the vm mapping and reset the direct
  	 * map. Find the start and end range of the direct mappings to make sure
  	 * the vm_unmap_aliases() flush includes the direct map.
  	 */
  	for (i = 0; i < area->nr_pages; i++) {
8e41f8726   Rick Edgecombe   mm/vmalloc: Fix c...
2133
2134
  		unsigned long addr = (unsigned long)page_address(area->pages[i]);
  		if (addr) {
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2135
  			start = min(addr, start);
8e41f8726   Rick Edgecombe   mm/vmalloc: Fix c...
2136
  			end = max(addr + PAGE_SIZE, end);
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2137
  			flush_dmap = 1;
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2138
2139
2140
2141
2142
2143
2144
2145
2146
  		}
  	}
  
  	/*
  	 * Set direct map to something invalid so that it won't be cached if
  	 * there are any accesses after the TLB flush, then flush the TLB and
  	 * reset the direct map permissions to the default.
  	 */
  	set_area_direct_map(area, set_direct_map_invalid_noflush);
31e67340c   Rick Edgecombe   mm/vmalloc: Avoid...
2147
  	_vm_unmap_aliases(start, end, flush_dmap);
868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2148
2149
  	set_area_direct_map(area, set_direct_map_default_noflush);
  }
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2150
  static void __vunmap(const void *addr, int deallocate_pages)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2151
2152
2153
2154
2155
  {
  	struct vm_struct *area;
  
  	if (!addr)
  		return;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2156
2157
  	if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)
  ",
ab15d9b4c   Dan Carpenter   mm/vmalloc.c: unb...
2158
  			addr))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2159
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2160

6ade20327   Liviu Dudau   mm/vmalloc.c: don...
2161
  	area = find_vm_area(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2162
  	if (unlikely(!area)) {
4c8573e25   Arjan van de Ven   Use WARN() in mm/...
2163
2164
  		WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2165
  				addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2166
2167
  		return;
  	}
05e3ff950   Chintan Pandya   mm: vmalloc: pass...
2168
2169
  	debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
  	debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
9a11b49a8   Ingo Molnar   [PATCH] lockdep: ...
2170

23713b480   Vincenzo Frascino   mm/vmalloc.c: fix...
2171
  	kasan_poison_vmalloc(area->addr, get_vm_area_size(area));
3c5c3cfb9   Daniel Axtens   kasan: support ba...
2172

868b104d7   Rick Edgecombe   mm/vmalloc: Add f...
2173
  	vm_remove_mappings(area, deallocate_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2174
2175
2176
2177
  	if (deallocate_pages) {
  		int i;
  
  		for (i = 0; i < area->nr_pages; i++) {
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2178
2179
2180
  			struct page *page = area->pages[i];
  
  			BUG_ON(!page);
4949148ad   Vladimir Davydov   mm: charge/unchar...
2181
  			__free_pages(page, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2182
  		}
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2183
  		atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2184

244d63ee3   David Rientjes   mm, vmalloc: remo...
2185
  		kvfree(area->pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2186
2187
2188
2189
2190
  	}
  
  	kfree(area);
  	return;
  }
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2191
2192
2193
2194
2195
2196
2197
  
  static inline void __vfree_deferred(const void *addr)
  {
  	/*
  	 * Use raw_cpu_ptr() because this can be called from preemptible
  	 * context. Preemption is absolutely fine here, because the llist_add()
  	 * implementation is lockless, so it works even if we are adding to
73221d888   Jeongtae Park   mm/vmalloc: fix a...
2198
  	 * another cpu's list. schedule_work() should be fine with this too.
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2199
2200
2201
2202
2203
2204
2205
2206
  	 */
  	struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
  
  	if (llist_add((struct llist_node *)addr, &p->list))
  		schedule_work(&p->wq);
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2207
2208
   * vfree_atomic - release memory allocated by vmalloc()
   * @addr:	  memory base address
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2209
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2210
2211
   * This one is just like vfree() but can be called in any atomic context
   * except NMIs.
bf22e37a6   Andrey Ryabinin   mm: add vfree_ato...
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
   */
  void vfree_atomic(const void *addr)
  {
  	BUG_ON(in_nmi());
  
  	kmemleak_free(addr);
  
  	if (!addr)
  		return;
  	__vfree_deferred(addr);
  }
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2223
2224
2225
2226
2227
2228
2229
  static void __vfree(const void *addr)
  {
  	if (unlikely(in_interrupt()))
  		__vfree_deferred(addr);
  	else
  		__vunmap(addr, 1);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2230
  /**
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2231
2232
   * vfree - Release memory allocated by vmalloc()
   * @addr:  Memory base address
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2233
   *
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2234
2235
2236
2237
   * Free the virtually continuous memory area starting at @addr, as obtained
   * from one of the vmalloc() family of APIs.  This will usually also free the
   * physical memory underlying the virtual allocation, but that memory is
   * reference counted, so it will not be freed until the last user goes away.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2238
   *
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2239
   * If @addr is NULL, no operation is performed.
c9fcee513   Andrew Morton   mm/vmalloc.c: add...
2240
   *
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2241
   * Context:
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2242
   * May sleep if called *not* from interrupt context.
fa307474c   Matthew Wilcox (Oracle)   mm: update the do...
2243
2244
2245
   * Must not be called in NMI context (strictly speaking, it could be
   * if we have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
   * conventions for vfree() arch-depenedent would be a really bad idea).
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2246
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2247
  void vfree(const void *addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2248
  {
32fcfd407   Al Viro   make vfree() safe...
2249
  	BUG_ON(in_nmi());
89219d37a   Catalin Marinas   kmemleak: Add the...
2250
2251
  
  	kmemleak_free(addr);
a8dda165e   Andrey Ryabinin   vfree: add debug ...
2252
  	might_sleep_if(!in_interrupt());
32fcfd407   Al Viro   make vfree() safe...
2253
2254
  	if (!addr)
  		return;
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2255
2256
  
  	__vfree(addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2257
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2258
2259
2260
  EXPORT_SYMBOL(vfree);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2261
2262
   * vunmap - release virtual mapping obtained by vmap()
   * @addr:   memory base address
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2263
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2264
2265
   * Free the virtually contiguous memory area starting at @addr,
   * which was created from the page array passed to vmap().
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2266
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2267
   * Must not be called in interrupt context.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2268
   */
b3bdda02a   Christoph Lameter   vmalloc: add cons...
2269
  void vunmap(const void *addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2270
2271
  {
  	BUG_ON(in_interrupt());
34754b69a   Peter Zijlstra   x86: make vmap ye...
2272
  	might_sleep();
32fcfd407   Al Viro   make vfree() safe...
2273
2274
  	if (addr)
  		__vunmap(addr, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2275
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2276
2277
2278
  EXPORT_SYMBOL(vunmap);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2279
2280
2281
2282
2283
2284
   * vmap - map an array of pages into virtually contiguous space
   * @pages: array of page pointers
   * @count: number of pages to map
   * @flags: vm_area->flags
   * @prot: page protection for the mapping
   *
b944afc9d   Christoph Hellwig   mm: add a VM_MAP_...
2285
2286
2287
2288
2289
   * Maps @count pages from @pages into contiguous kernel virtual space.
   * If @flags contains %VM_MAP_PUT_PAGES the ownership of the pages array itself
   * (which must be kmalloc or vmalloc memory) and one reference per pages in it
   * are transferred from the caller to vmap(), and will be freed / dropped when
   * vfree() is called on the return value.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2290
2291
   *
   * Return: the address of the area or %NULL on failure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2292
2293
   */
  void *vmap(struct page **pages, unsigned int count,
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2294
  	   unsigned long flags, pgprot_t prot)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2295
2296
  {
  	struct vm_struct *area;
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2297
  	unsigned long size;		/* In bytes */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2298

34754b69a   Peter Zijlstra   x86: make vmap ye...
2299
  	might_sleep();
ca79b0c21   Arun KS   mm: convert total...
2300
  	if (count > totalram_pages())
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2301
  		return NULL;
65ee03c4b   Guillermo Julián Moreno   mm: fix overflow ...
2302
2303
  	size = (unsigned long)count << PAGE_SHIFT;
  	area = get_vm_area_caller(size, flags, __builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2304
2305
  	if (!area)
  		return NULL;
230169693   Christoph Lameter   vmallocinfo: add ...
2306

cca98e9f8   Christoph Hellwig   mm: enforce that ...
2307
  	if (map_kernel_range((unsigned long)area->addr, size, pgprot_nx(prot),
ed1f324c5   Christoph Hellwig   mm: remove map_vm...
2308
  			pages) < 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2309
2310
2311
  		vunmap(area->addr);
  		return NULL;
  	}
b4ecc2596   Miaohe Lin   mm/vmalloc.c: fix...
2312
  	if (flags & VM_MAP_PUT_PAGES) {
b944afc9d   Christoph Hellwig   mm: add a VM_MAP_...
2313
  		area->pages = pages;
b4ecc2596   Miaohe Lin   mm/vmalloc.c: fix...
2314
2315
  		area->nr_pages = count;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2316
2317
  	return area->addr;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2318
  EXPORT_SYMBOL(vmap);
3e9a9e256   Christoph Hellwig   mm: add a vmap_pf...
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
  #ifdef CONFIG_VMAP_PFN
  struct vmap_pfn_data {
  	unsigned long	*pfns;
  	pgprot_t	prot;
  	unsigned int	idx;
  };
  
  static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private)
  {
  	struct vmap_pfn_data *data = private;
  
  	if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx])))
  		return -EINVAL;
  	*pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot));
  	return 0;
  }
  
  /**
   * vmap_pfn - map an array of PFNs into virtually contiguous space
   * @pfns: array of PFNs
   * @count: number of pages to map
   * @prot: page protection for the mapping
   *
   * Maps @count PFNs from @pfns into contiguous kernel virtual space and returns
   * the start address of the mapping.
   */
  void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
  {
  	struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) };
  	struct vm_struct *area;
  
  	area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP,
  			__builtin_return_address(0));
  	if (!area)
  		return NULL;
  	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
  			count * PAGE_SIZE, vmap_pfn_apply, &data)) {
  		free_vm_area(area);
  		return NULL;
  	}
  	return area->addr;
  }
  EXPORT_SYMBOL_GPL(vmap_pfn);
  #endif /* CONFIG_VMAP_PFN */
e31d9eb5c   Adrian Bunk   make __vmalloc_ar...
2363
  static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
3722e13cf   Wanpeng Li   mm/vmalloc: don't...
2364
  				 pgprot_t prot, int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2365
  {
930f036b4   David Rientjes   mm, vmalloc: cons...
2366
  	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
f255935b9   Christoph Hellwig   mm: cleanup the g...
2367
2368
2369
  	unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
  	unsigned int array_size = nr_pages * sizeof(struct page *), i;
  	struct page **pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2370

f255935b9   Christoph Hellwig   mm: cleanup the g...
2371
2372
2373
  	gfp_mask |= __GFP_NOWARN;
  	if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
  		gfp_mask |= __GFP_HIGHMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2374

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2375
  	/* Please note that the recursion is strictly bounded. */
8757d5fa6   Jan Kiszka   [PATCH] mm: fix o...
2376
  	if (array_size > PAGE_SIZE) {
f255935b9   Christoph Hellwig   mm: cleanup the g...
2377
2378
  		pages = __vmalloc_node(array_size, 1, nested_gfp, node,
  					area->caller);
286e1ea3a   Andrew Morton   [PATCH] vmalloc()...
2379
  	} else {
976d6dfbb   Jan Beulich   vmalloc(): adjust...
2380
  		pages = kmalloc_node(array_size, nested_gfp, node);
286e1ea3a   Andrew Morton   [PATCH] vmalloc()...
2381
  	}
7ea362427   Austin Kim   mm/vmalloc.c: mov...
2382
2383
  
  	if (!pages) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2384
2385
2386
2387
  		remove_vm_area(area->addr);
  		kfree(area);
  		return NULL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2388

7ea362427   Austin Kim   mm/vmalloc.c: mov...
2389
2390
  	area->pages = pages;
  	area->nr_pages = nr_pages;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2391
  	for (i = 0; i < area->nr_pages; i++) {
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2392
  		struct page *page;
4b90951c0   Jianguo Wu   mm/vmalloc: use N...
2393
  		if (node == NUMA_NO_NODE)
f255935b9   Christoph Hellwig   mm: cleanup the g...
2394
  			page = alloc_page(gfp_mask);
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2395
  		else
f255935b9   Christoph Hellwig   mm: cleanup the g...
2396
  			page = alloc_pages_node(node, gfp_mask, 0);
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2397
2398
  
  		if (unlikely(!page)) {
82afbc32f   Hui Su   mm/vmalloc.c: upd...
2399
  			/* Successfully allocated i pages, free them in __vfree() */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2400
  			area->nr_pages = i;
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2401
  			atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2402
2403
  			goto fail;
  		}
bf53d6f8f   Christoph Lameter   vmalloc: clean up...
2404
  		area->pages[i] = page;
dcf61ff06   Liu Xiang   mm/vmalloc.c: rem...
2405
  		if (gfpflags_allow_blocking(gfp_mask))
660654f90   Eric Dumazet   mm/vmalloc.c: add...
2406
  			cond_resched();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2407
  	}
97105f0ab   Roman Gushchin   mm: vmalloc: show...
2408
  	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2409

ed1f324c5   Christoph Hellwig   mm: remove map_vm...
2410
2411
  	if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area),
  			prot, pages) < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2412
  		goto fail;
ed1f324c5   Christoph Hellwig   mm: remove map_vm...
2413

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2414
2415
2416
  	return area->addr;
  
  fail:
a8e99259e   Michal Hocko   mm, page_alloc: w...
2417
  	warn_alloc(gfp_mask, NULL,
7877cdcc3   Michal Hocko   mm: consolidate w...
2418
  			  "vmalloc: allocation failure, allocated %ld of %ld bytes",
22943ab11   Dave Hansen   mm: print vmalloc...
2419
  			  (area->nr_pages*PAGE_SIZE), area->size);
c67dc6247   Roman Penyaev   mm/vmalloc: do no...
2420
  	__vfree(area->addr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2421
2422
2423
2424
  	return NULL;
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
   * __vmalloc_node_range - allocate virtually contiguous memory
   * @size:		  allocation size
   * @align:		  desired alignment
   * @start:		  vm area range start
   * @end:		  vm area range end
   * @gfp_mask:		  flags for the page level allocator
   * @prot:		  protection mask for the allocated pages
   * @vm_flags:		  additional vm area flags (e.g. %VM_NO_GUARD)
   * @node:		  node to use for allocation or NUMA_NO_NODE
   * @caller:		  caller's return address
   *
   * Allocate enough pages to cover @size from the page level
   * allocator with @gfp_mask flags.  Map them into contiguous
   * kernel virtual space, using a pagetable protection of @prot.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2439
2440
   *
   * Return: the address of the area or %NULL on failure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2441
   */
d0a21265d   David Rientjes   mm: unify module_...
2442
2443
  void *__vmalloc_node_range(unsigned long size, unsigned long align,
  			unsigned long start, unsigned long end, gfp_t gfp_mask,
cb9e3c292   Andrey Ryabinin   mm: vmalloc: pass...
2444
2445
  			pgprot_t prot, unsigned long vm_flags, int node,
  			const void *caller)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2446
2447
  {
  	struct vm_struct *area;
89219d37a   Catalin Marinas   kmemleak: Add the...
2448
2449
  	void *addr;
  	unsigned long real_size = size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2450
2451
  
  	size = PAGE_ALIGN(size);
ca79b0c21   Arun KS   mm: convert total...
2452
  	if (!size || (size >> PAGE_SHIFT) > totalram_pages())
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2453
  		goto fail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2454

d98c9e83b   Andrey Ryabinin   kasan: fix crashe...
2455
  	area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
cb9e3c292   Andrey Ryabinin   mm: vmalloc: pass...
2456
  				vm_flags, start, end, node, gfp_mask, caller);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2457
  	if (!area)
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2458
  		goto fail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2459

3722e13cf   Wanpeng Li   mm/vmalloc: don't...
2460
  	addr = __vmalloc_area_node(area, gfp_mask, prot, node);
1368edf06   Mel Gorman   mm: vmalloc: chec...
2461
  	if (!addr)
b82225f3f   Wanpeng Li   revert mm/vmalloc...
2462
  		return NULL;
89219d37a   Catalin Marinas   kmemleak: Add the...
2463
2464
  
  	/*
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2465
2466
  	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED
  	 * flag. It means that vm_struct is not fully initialized.
4341fa454   Joonsoo Kim   mm, vmalloc: remo...
2467
  	 * Now, it is fully initialized, so remove this flag here.
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2468
  	 */
20fc02b47   Zhang Yanfei   mm/vmalloc.c: ren...
2469
  	clear_vm_uninitialized_flag(area);
f5252e009   Mitsuo Hayasaka   mm: avoid null po...
2470

94f4a1618   Catalin Marinas   mm: kmemleak: tre...
2471
  	kmemleak_vmalloc(area, size, gfp_mask);
89219d37a   Catalin Marinas   kmemleak: Add the...
2472
2473
  
  	return addr;
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2474
2475
  
  fail:
a8e99259e   Michal Hocko   mm, page_alloc: w...
2476
  	warn_alloc(gfp_mask, NULL,
7877cdcc3   Michal Hocko   mm: consolidate w...
2477
  			  "vmalloc: allocation failure: %lu bytes", real_size);
de7d2b567   Joe Perches   mm/vmalloc.c: rep...
2478
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2479
  }
d0a21265d   David Rientjes   mm: unify module_...
2480
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2481
2482
2483
2484
   * __vmalloc_node - allocate virtually contiguous memory
   * @size:	    allocation size
   * @align:	    desired alignment
   * @gfp_mask:	    flags for the page level allocator
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2485
2486
   * @node:	    node to use for allocation or NUMA_NO_NODE
   * @caller:	    caller's return address
a7c3e901a   Michal Hocko   mm: introduce kv[...
2487
   *
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2488
2489
   * Allocate enough pages to cover @size from the page level allocator with
   * @gfp_mask flags.  Map them into contiguous kernel virtual space.
a7c3e901a   Michal Hocko   mm: introduce kv[...
2490
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2491
2492
   * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
   * and __GFP_NOFAIL are not supported
a7c3e901a   Michal Hocko   mm: introduce kv[...
2493
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2494
2495
   * Any use of gfp flags outside of GFP_KERNEL should be consulted
   * with mm people.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2496
2497
   *
   * Return: pointer to the allocated memory or %NULL on error
d0a21265d   David Rientjes   mm: unify module_...
2498
   */
2b9059489   Christoph Hellwig   mm: remove __vmal...
2499
  void *__vmalloc_node(unsigned long size, unsigned long align,
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2500
  			    gfp_t gfp_mask, int node, const void *caller)
d0a21265d   David Rientjes   mm: unify module_...
2501
2502
  {
  	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2503
  				gfp_mask, PAGE_KERNEL, 0, node, caller);
d0a21265d   David Rientjes   mm: unify module_...
2504
  }
c3f896dcf   Christoph Hellwig   mm: switch the te...
2505
2506
2507
2508
2509
2510
2511
2512
  /*
   * This is only for performance analysis of vmalloc and stress purpose.
   * It is required by vmalloc test module, therefore do not use it other
   * than that.
   */
  #ifdef CONFIG_TEST_VMALLOC_MODULE
  EXPORT_SYMBOL_GPL(__vmalloc_node);
  #endif
d0a21265d   David Rientjes   mm: unify module_...
2513

88dca4ca5   Christoph Hellwig   mm: remove the pg...
2514
  void *__vmalloc(unsigned long size, gfp_t gfp_mask)
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2515
  {
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2516
  	return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
230169693   Christoph Lameter   vmallocinfo: add ...
2517
  				__builtin_return_address(0));
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2518
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2519
2520
2521
  EXPORT_SYMBOL(__vmalloc);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2522
2523
2524
2525
2526
   * vmalloc - allocate virtually contiguous memory
   * @size:    allocation size
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2527
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2528
2529
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2530
2531
   *
   * Return: pointer to the allocated memory or %NULL on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2532
2533
2534
   */
  void *vmalloc(unsigned long size)
  {
4d39d7285   Christoph Hellwig   mm: remove both i...
2535
2536
  	return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
  				__builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2537
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2538
  EXPORT_SYMBOL(vmalloc);
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2539
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2540
2541
2542
2543
2544
2545
2546
2547
2548
   * vzalloc - allocate virtually contiguous memory with zero fill
   * @size:    allocation size
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
   * The memory allocated is set to zero.
   *
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2549
2550
   *
   * Return: pointer to the allocated memory or %NULL on error
e1ca7788d   Dave Young   mm: add vzalloc()...
2551
2552
2553
   */
  void *vzalloc(unsigned long size)
  {
4d39d7285   Christoph Hellwig   mm: remove both i...
2554
2555
  	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
  				__builtin_return_address(0));
e1ca7788d   Dave Young   mm: add vzalloc()...
2556
2557
2558
2559
  }
  EXPORT_SYMBOL(vzalloc);
  
  /**
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2560
2561
   * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
   * @size: allocation size
833423143   Nick Piggin   [PATCH] mm: intro...
2562
   *
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2563
2564
   * The resulting memory area is zeroed so it can be mapped to userspace
   * without leaking data.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2565
2566
   *
   * Return: pointer to the allocated memory or %NULL on error
833423143   Nick Piggin   [PATCH] mm: intro...
2567
2568
2569
   */
  void *vmalloc_user(unsigned long size)
  {
bc84c5352   Roman Penyaev   mm/vmalloc: pass ...
2570
2571
2572
2573
  	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
  				    GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
  				    VM_USERMAP, NUMA_NO_NODE,
  				    __builtin_return_address(0));
833423143   Nick Piggin   [PATCH] mm: intro...
2574
2575
2576
2577
  }
  EXPORT_SYMBOL(vmalloc_user);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2578
2579
2580
   * vmalloc_node - allocate memory on a specific node
   * @size:	  allocation size
   * @node:	  numa node
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2581
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2582
2583
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2584
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2585
2586
   * For tight control over page level allocator and protection flags
   * use __vmalloc() instead.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2587
2588
   *
   * Return: pointer to the allocated memory or %NULL on error
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2589
2590
2591
   */
  void *vmalloc_node(unsigned long size, int node)
  {
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2592
2593
  	return __vmalloc_node(size, 1, GFP_KERNEL, node,
  			__builtin_return_address(0));
930fc45a4   Christoph Lameter   [PATCH] vmalloc_node
2594
2595
  }
  EXPORT_SYMBOL(vmalloc_node);
e1ca7788d   Dave Young   mm: add vzalloc()...
2596
2597
2598
2599
2600
2601
2602
2603
2604
  /**
   * vzalloc_node - allocate memory on a specific node with zero fill
   * @size:	allocation size
   * @node:	numa node
   *
   * Allocate enough pages to cover @size from the page level
   * allocator and map them into contiguous kernel virtual space.
   * The memory allocated is set to zero.
   *
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2605
   * Return: pointer to the allocated memory or %NULL on error
e1ca7788d   Dave Young   mm: add vzalloc()...
2606
2607
2608
   */
  void *vzalloc_node(unsigned long size, int node)
  {
4d39d7285   Christoph Hellwig   mm: remove both i...
2609
2610
  	return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
  				__builtin_return_address(0));
e1ca7788d   Dave Young   mm: add vzalloc()...
2611
2612
  }
  EXPORT_SYMBOL(vzalloc_node);
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2613
  #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
698d0831b   Michal Hocko   vmalloc: fix __GF...
2614
  #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2615
  #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
698d0831b   Michal Hocko   vmalloc: fix __GF...
2616
  #define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2617
  #else
698d0831b   Michal Hocko   vmalloc: fix __GF...
2618
2619
2620
2621
2622
  /*
   * 64b systems should always have either DMA or DMA32 zones. For others
   * GFP_DMA32 should do the right thing and use the normal zone.
   */
  #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
0d08e0d3a   Andi Kleen   [PATCH] x86-64: F...
2623
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2624
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2625
2626
   * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
   * @size:	allocation size
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2627
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2628
2629
   * Allocate enough 32bit PA addressable pages to cover @size from the
   * page level allocator and map them into contiguous kernel virtual space.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2630
2631
   *
   * Return: pointer to the allocated memory or %NULL on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2632
2633
2634
   */
  void *vmalloc_32(unsigned long size)
  {
f38fcb9c1   Christoph Hellwig   mm: remove the pr...
2635
2636
  	return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
  			__builtin_return_address(0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2637
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2638
  EXPORT_SYMBOL(vmalloc_32);
833423143   Nick Piggin   [PATCH] mm: intro...
2639
  /**
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2640
   * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2641
   * @size:	     allocation size
ead04089b   Rolf Eike Beer   [PATCH] Fix kerne...
2642
2643
2644
   *
   * The resulting memory area is 32bit addressable and zeroed so it can be
   * mapped to userspace without leaking data.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2645
2646
   *
   * Return: pointer to the allocated memory or %NULL on error
833423143   Nick Piggin   [PATCH] mm: intro...
2647
2648
2649
   */
  void *vmalloc_32_user(unsigned long size)
  {
bc84c5352   Roman Penyaev   mm/vmalloc: pass ...
2650
2651
2652
2653
  	return __vmalloc_node_range(size, SHMLBA,  VMALLOC_START, VMALLOC_END,
  				    GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
  				    VM_USERMAP, NUMA_NO_NODE,
  				    __builtin_return_address(0));
833423143   Nick Piggin   [PATCH] mm: intro...
2654
2655
  }
  EXPORT_SYMBOL(vmalloc_32_user);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
  /*
   * small helper routine , copy contents to buf from addr.
   * If the page is not present, fill zero.
   */
  
  static int aligned_vread(char *buf, char *addr, unsigned long count)
  {
  	struct page *p;
  	int copied = 0;
  
  	while (count) {
  		unsigned long offset, length;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
2668
  		offset = offset_in_page(addr);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
  		length = PAGE_SIZE - offset;
  		if (length > count)
  			length = count;
  		p = vmalloc_to_page(addr);
  		/*
  		 * To do safe access to this _mapped_ area, we need
  		 * lock. But adding lock here means that we need to add
  		 * overhead of vmalloc()/vfree() calles for this _debug_
  		 * interface, rarely used. Instead of that, we'll use
  		 * kmap() and get small overhead in this access function.
  		 */
  		if (p) {
  			/*
  			 * we can expect USER0 is not used (see vread/vwrite's
  			 * function description)
  			 */
9b04c5fec   Cong Wang   mm: remove the se...
2685
  			void *map = kmap_atomic(p);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2686
  			memcpy(buf, map + offset, length);
9b04c5fec   Cong Wang   mm: remove the se...
2687
  			kunmap_atomic(map);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
  		} else
  			memset(buf, 0, length);
  
  		addr += length;
  		buf += length;
  		copied += length;
  		count -= length;
  	}
  	return copied;
  }
  
  static int aligned_vwrite(char *buf, char *addr, unsigned long count)
  {
  	struct page *p;
  	int copied = 0;
  
  	while (count) {
  		unsigned long offset, length;
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
2706
  		offset = offset_in_page(addr);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
  		length = PAGE_SIZE - offset;
  		if (length > count)
  			length = count;
  		p = vmalloc_to_page(addr);
  		/*
  		 * To do safe access to this _mapped_ area, we need
  		 * lock. But adding lock here means that we need to add
  		 * overhead of vmalloc()/vfree() calles for this _debug_
  		 * interface, rarely used. Instead of that, we'll use
  		 * kmap() and get small overhead in this access function.
  		 */
  		if (p) {
  			/*
  			 * we can expect USER0 is not used (see vread/vwrite's
  			 * function description)
  			 */
9b04c5fec   Cong Wang   mm: remove the se...
2723
  			void *map = kmap_atomic(p);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2724
  			memcpy(map + offset, buf, length);
9b04c5fec   Cong Wang   mm: remove the se...
2725
  			kunmap_atomic(map);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
  		}
  		addr += length;
  		buf += length;
  		copied += length;
  		count -= length;
  	}
  	return copied;
  }
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2736
2737
2738
2739
2740
   * vread() - read vmalloc area in a safe way.
   * @buf:     buffer for reading data
   * @addr:    vm address.
   * @count:   number of bytes to be read.
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
   * This function checks that addr is a valid vmalloc'ed area, and
   * copy data from that area to a given buffer. If the given memory range
   * of [addr...addr+count) includes some valid address, data is copied to
   * proper area of @buf. If there are memory holes, they'll be zero-filled.
   * IOREMAP area is treated as memory hole and no copy is done.
   *
   * If [addr...addr+count) doesn't includes any intersects with alive
   * vm_struct area, returns 0. @buf should be kernel's buffer.
   *
   * Note: In usual ops, vread() is never necessary because the caller
   * should know vmalloc() area is valid and can use memcpy().
   * This is for routines which have to access vmalloc area without
d9009d67f   Geert Uytterhoeven   mm/vmalloc.c: spe...
2753
   * any information, as /dev/kmem.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2754
2755
2756
2757
   *
   * Return: number of bytes for which addr and buf should be increased
   * (same number as @count) or %0 if [addr...addr+count) doesn't
   * include any intersection with valid vmalloc area
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2758
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2759
2760
  long vread(char *buf, char *addr, unsigned long count)
  {
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2761
2762
  	struct vmap_area *va;
  	struct vm_struct *vm;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2763
  	char *vaddr, *buf_start = buf;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2764
  	unsigned long buflen = count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2765
2766
2767
2768
2769
  	unsigned long n;
  
  	/* Don't allow overflow */
  	if ((unsigned long) addr + count < count)
  		count = -(unsigned long) addr;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2770
2771
2772
2773
  	spin_lock(&vmap_area_lock);
  	list_for_each_entry(va, &vmap_area_list, list) {
  		if (!count)
  			break;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2774
  		if (!va->vm)
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2775
2776
2777
2778
  			continue;
  
  		vm = va->vm;
  		vaddr = (char *) vm->addr;
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2779
  		if (addr >= vaddr + get_vm_area_size(vm))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2780
2781
2782
2783
2784
2785
2786
2787
2788
  			continue;
  		while (addr < vaddr) {
  			if (count == 0)
  				goto finished;
  			*buf = '\0';
  			buf++;
  			addr++;
  			count--;
  		}
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2789
  		n = vaddr + get_vm_area_size(vm) - addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2790
2791
  		if (n > count)
  			n = count;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2792
  		if (!(vm->flags & VM_IOREMAP))
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2793
2794
2795
2796
2797
2798
  			aligned_vread(buf, addr, n);
  		else /* IOREMAP area is treated as memory hole */
  			memset(buf, 0, n);
  		buf += n;
  		addr += n;
  		count -= n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2799
2800
  	}
  finished:
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2801
  	spin_unlock(&vmap_area_lock);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2802
2803
2804
2805
2806
2807
2808
2809
  
  	if (buf == buf_start)
  		return 0;
  	/* zero-fill memory holes */
  	if (buf != buf_start + buflen)
  		memset(buf, 0, buflen - (buf - buf_start));
  
  	return buflen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2810
  }
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2811
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2812
2813
2814
2815
2816
   * vwrite() - write vmalloc area in a safe way.
   * @buf:      buffer for source data
   * @addr:     vm address.
   * @count:    number of bytes to be read.
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
   * This function checks that addr is a valid vmalloc'ed area, and
   * copy data from a buffer to the given addr. If specified range of
   * [addr...addr+count) includes some valid address, data is copied from
   * proper area of @buf. If there are memory holes, no copy to hole.
   * IOREMAP area is treated as memory hole and no copy is done.
   *
   * If [addr...addr+count) doesn't includes any intersects with alive
   * vm_struct area, returns 0. @buf should be kernel's buffer.
   *
   * Note: In usual ops, vwrite() is never necessary because the caller
   * should know vmalloc() area is valid and can use memcpy().
   * This is for routines which have to access vmalloc area without
d9009d67f   Geert Uytterhoeven   mm/vmalloc.c: spe...
2829
   * any information, as /dev/kmem.
a862f68a8   Mike Rapoport   docs/core-api/mm:...
2830
2831
2832
2833
   *
   * Return: number of bytes for which addr and buf should be
   * increased (same number as @count) or %0 if [addr...addr+count)
   * doesn't include any intersection with valid vmalloc area
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2834
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2835
2836
  long vwrite(char *buf, char *addr, unsigned long count)
  {
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2837
2838
  	struct vmap_area *va;
  	struct vm_struct *vm;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2839
2840
2841
  	char *vaddr;
  	unsigned long n, buflen;
  	int copied = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2842
2843
2844
2845
  
  	/* Don't allow overflow */
  	if ((unsigned long) addr + count < count)
  		count = -(unsigned long) addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2846
  	buflen = count;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2847

e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2848
2849
2850
2851
  	spin_lock(&vmap_area_lock);
  	list_for_each_entry(va, &vmap_area_list, list) {
  		if (!count)
  			break;
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
2852
  		if (!va->vm)
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2853
2854
2855
2856
  			continue;
  
  		vm = va->vm;
  		vaddr = (char *) vm->addr;
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2857
  		if (addr >= vaddr + get_vm_area_size(vm))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2858
2859
2860
2861
2862
2863
2864
2865
  			continue;
  		while (addr < vaddr) {
  			if (count == 0)
  				goto finished;
  			buf++;
  			addr++;
  			count--;
  		}
762216ab4   Wanpeng Li   mm/vmalloc: use w...
2866
  		n = vaddr + get_vm_area_size(vm) - addr;
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2867
2868
  		if (n > count)
  			n = count;
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2869
  		if (!(vm->flags & VM_IOREMAP)) {
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2870
2871
2872
2873
2874
2875
  			aligned_vwrite(buf, addr, n);
  			copied++;
  		}
  		buf += n;
  		addr += n;
  		count -= n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2876
2877
  	}
  finished:
e81ce85f9   Joonsoo Kim   mm, vmalloc: iter...
2878
  	spin_unlock(&vmap_area_lock);
d0107eb07   KAMEZAWA Hiroyuki   kcore: fix vread/...
2879
2880
2881
  	if (!copied)
  		return 0;
  	return buflen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2882
  }
833423143   Nick Piggin   [PATCH] mm: intro...
2883
2884
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2885
2886
2887
2888
   * remap_vmalloc_range_partial - map vmalloc pages to userspace
   * @vma:		vma to cover
   * @uaddr:		target user address to start at
   * @kaddr:		virtual address of vmalloc kernel memory
bdebd6a28   Jann Horn   vmalloc: fix rema...
2889
   * @pgoff:		offset from @kaddr to start at
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2890
   * @size:		size of map area
7682486b3   Randy Dunlap   mm: fix various k...
2891
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2892
   * Returns:	0 for success, -Exxx on failure
833423143   Nick Piggin   [PATCH] mm: intro...
2893
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2894
2895
2896
2897
   * This function checks that @kaddr is a valid vmalloc'ed area,
   * and that it is big enough to cover the range starting at
   * @uaddr in @vma. Will return failure if that criteria isn't
   * met.
833423143   Nick Piggin   [PATCH] mm: intro...
2898
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2899
   * Similar to remap_pfn_range() (see mm/memory.c)
833423143   Nick Piggin   [PATCH] mm: intro...
2900
   */
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2901
  int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
bdebd6a28   Jann Horn   vmalloc: fix rema...
2902
2903
  				void *kaddr, unsigned long pgoff,
  				unsigned long size)
833423143   Nick Piggin   [PATCH] mm: intro...
2904
2905
  {
  	struct vm_struct *area;
bdebd6a28   Jann Horn   vmalloc: fix rema...
2906
2907
2908
2909
2910
  	unsigned long off;
  	unsigned long end_index;
  
  	if (check_shl_overflow(pgoff, PAGE_SHIFT, &off))
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
2911

e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2912
2913
2914
  	size = PAGE_ALIGN(size);
  
  	if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
833423143   Nick Piggin   [PATCH] mm: intro...
2915
  		return -EINVAL;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2916
  	area = find_vm_area(kaddr);
833423143   Nick Piggin   [PATCH] mm: intro...
2917
  	if (!area)
db64fe022   Nick Piggin   mm: rewrite vmap ...
2918
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
2919

fe9041c24   Christoph Hellwig   vmalloc: lift the...
2920
  	if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
db64fe022   Nick Piggin   mm: rewrite vmap ...
2921
  		return -EINVAL;
833423143   Nick Piggin   [PATCH] mm: intro...
2922

bdebd6a28   Jann Horn   vmalloc: fix rema...
2923
2924
  	if (check_add_overflow(size, off, &end_index) ||
  	    end_index > get_vm_area_size(area))
db64fe022   Nick Piggin   mm: rewrite vmap ...
2925
  		return -EINVAL;
bdebd6a28   Jann Horn   vmalloc: fix rema...
2926
  	kaddr += off;
833423143   Nick Piggin   [PATCH] mm: intro...
2927

833423143   Nick Piggin   [PATCH] mm: intro...
2928
  	do {
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2929
  		struct page *page = vmalloc_to_page(kaddr);
db64fe022   Nick Piggin   mm: rewrite vmap ...
2930
  		int ret;
833423143   Nick Piggin   [PATCH] mm: intro...
2931
2932
2933
2934
2935
  		ret = vm_insert_page(vma, uaddr, page);
  		if (ret)
  			return ret;
  
  		uaddr += PAGE_SIZE;
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2936
2937
2938
  		kaddr += PAGE_SIZE;
  		size -= PAGE_SIZE;
  	} while (size > 0);
833423143   Nick Piggin   [PATCH] mm: intro...
2939

314e51b98   Konstantin Khlebnikov   mm: kill vma flag...
2940
  	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
833423143   Nick Piggin   [PATCH] mm: intro...
2941

db64fe022   Nick Piggin   mm: rewrite vmap ...
2942
  	return 0;
833423143   Nick Piggin   [PATCH] mm: intro...
2943
  }
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2944
2945
2946
  EXPORT_SYMBOL(remap_vmalloc_range_partial);
  
  /**
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2947
2948
2949
2950
   * remap_vmalloc_range - map vmalloc pages to userspace
   * @vma:		vma to cover (map full range of vma)
   * @addr:		vmalloc memory
   * @pgoff:		number of pages into addr before first page to map
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2951
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2952
   * Returns:	0 for success, -Exxx on failure
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2953
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2954
2955
2956
   * This function checks that addr is a valid vmalloc'ed area, and
   * that it is big enough to cover the vma. Will return failure if
   * that criteria isn't met.
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2957
   *
92eac1681   Mike Rapoport   docs/mm: vmalloc:...
2958
   * Similar to remap_pfn_range() (see mm/memory.c)
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2959
2960
2961
2962
2963
   */
  int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
  						unsigned long pgoff)
  {
  	return remap_vmalloc_range_partial(vma, vma->vm_start,
bdebd6a28   Jann Horn   vmalloc: fix rema...
2964
  					   addr, pgoff,
e69e9d4ae   HATAYAMA Daisuke   vmalloc: introduc...
2965
2966
  					   vma->vm_end - vma->vm_start);
  }
833423143   Nick Piggin   [PATCH] mm: intro...
2967
  EXPORT_SYMBOL(remap_vmalloc_range);
5f4352fbf   Jeremy Fitzhardinge   Allocate and free...
2968
2969
2970
2971
2972
2973
2974
2975
  void free_vm_area(struct vm_struct *area)
  {
  	struct vm_struct *ret;
  	ret = remove_vm_area(area->addr);
  	BUG_ON(ret != area);
  	kfree(area);
  }
  EXPORT_SYMBOL_GPL(free_vm_area);
a10aa5798   Christoph Lameter   vmalloc: show vma...
2976

4f8b02b4e   Tejun Heo   vmalloc: pcpu_get...
2977
  #ifdef CONFIG_SMP
ca23e405e   Tejun Heo   vmalloc: implemen...
2978
2979
  static struct vmap_area *node_to_va(struct rb_node *n)
  {
4583e7731   Geliang Tang   mm/vmalloc.c: use...
2980
  	return rb_entry_safe(n, struct vmap_area, rb_node);
ca23e405e   Tejun Heo   vmalloc: implemen...
2981
2982
2983
  }
  
  /**
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2984
2985
   * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
   * @addr: target address
ca23e405e   Tejun Heo   vmalloc: implemen...
2986
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2987
2988
2989
2990
   * Returns: vmap_area if it is found. If there is no such area
   *   the first highest(reverse order) vmap_area is returned
   *   i.e. va->va_start < addr && va->va_end < addr or NULL
   *   if there are no any areas before @addr.
ca23e405e   Tejun Heo   vmalloc: implemen...
2991
   */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2992
2993
  static struct vmap_area *
  pvm_find_va_enclose_addr(unsigned long addr)
ca23e405e   Tejun Heo   vmalloc: implemen...
2994
  {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
2995
2996
2997
2998
2999
  	struct vmap_area *va, *tmp;
  	struct rb_node *n;
  
  	n = free_vmap_area_root.rb_node;
  	va = NULL;
ca23e405e   Tejun Heo   vmalloc: implemen...
3000
3001
  
  	while (n) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3002
3003
3004
3005
3006
  		tmp = rb_entry(n, struct vmap_area, rb_node);
  		if (tmp->va_start <= addr) {
  			va = tmp;
  			if (tmp->va_end >= addr)
  				break;
ca23e405e   Tejun Heo   vmalloc: implemen...
3007
  			n = n->rb_right;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3008
3009
3010
  		} else {
  			n = n->rb_left;
  		}
ca23e405e   Tejun Heo   vmalloc: implemen...
3011
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3012
  	return va;
ca23e405e   Tejun Heo   vmalloc: implemen...
3013
3014
3015
  }
  
  /**
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3016
3017
3018
3019
3020
   * pvm_determine_end_from_reverse - find the highest aligned address
   * of free block below VMALLOC_END
   * @va:
   *   in - the VA we start the search(reverse order);
   *   out - the VA with the highest aligned end address.
ca23e405e   Tejun Heo   vmalloc: implemen...
3021
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3022
   * Returns: determined end address within vmap_area
ca23e405e   Tejun Heo   vmalloc: implemen...
3023
   */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3024
3025
  static unsigned long
  pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
ca23e405e   Tejun Heo   vmalloc: implemen...
3026
  {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3027
  	unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
ca23e405e   Tejun Heo   vmalloc: implemen...
3028
  	unsigned long addr;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3029
3030
3031
3032
3033
3034
3035
  	if (likely(*va)) {
  		list_for_each_entry_from_reverse((*va),
  				&free_vmap_area_list, list) {
  			addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
  			if ((*va)->va_start < addr)
  				return addr;
  		}
ca23e405e   Tejun Heo   vmalloc: implemen...
3036
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3037
  	return 0;
ca23e405e   Tejun Heo   vmalloc: implemen...
3038
3039
3040
3041
3042
3043
3044
3045
  }
  
  /**
   * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
   * @offsets: array containing offset of each area
   * @sizes: array containing size of each area
   * @nr_vms: the number of areas to allocate
   * @align: alignment, all entries in @offsets and @sizes must be aligned to this
ca23e405e   Tejun Heo   vmalloc: implemen...
3046
3047
3048
3049
3050
3051
   *
   * Returns: kmalloc'd vm_struct pointer array pointing to allocated
   *	    vm_structs on success, %NULL on failure
   *
   * Percpu allocator wants to use congruent vm areas so that it can
   * maintain the offsets among percpu areas.  This function allocates
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3052
3053
3054
3055
   * congruent vmalloc areas for it with GFP_KERNEL.  These areas tend to
   * be scattered pretty far, distance between two areas easily going up
   * to gigabytes.  To avoid interacting with regular vmallocs, these
   * areas are allocated from top.
ca23e405e   Tejun Heo   vmalloc: implemen...
3056
   *
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3057
3058
3059
3060
3061
3062
   * Despite its complicated look, this allocator is rather simple. It
   * does everything top-down and scans free blocks from the end looking
   * for matching base. While scanning, if any of the areas do not fit the
   * base address is pulled down to fit the area. Scanning is repeated till
   * all the areas fit and then all necessary data structures are inserted
   * and the result is returned.
ca23e405e   Tejun Heo   vmalloc: implemen...
3063
3064
3065
   */
  struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
  				     const size_t *sizes, int nr_vms,
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3066
  				     size_t align)
ca23e405e   Tejun Heo   vmalloc: implemen...
3067
3068
3069
  {
  	const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
  	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3070
  	struct vmap_area **vas, *va;
ca23e405e   Tejun Heo   vmalloc: implemen...
3071
3072
  	struct vm_struct **vms;
  	int area, area2, last_area, term_area;
253a496d8   Daniel Axtens   kasan: don't assu...
3073
  	unsigned long base, start, size, end, last_end, orig_start, orig_end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3074
  	bool purged = false;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3075
  	enum fit_type type;
ca23e405e   Tejun Heo   vmalloc: implemen...
3076

ca23e405e   Tejun Heo   vmalloc: implemen...
3077
  	/* verify parameters and allocate data structures */
891c49abf   Alexander Kuleshov   mm/vmalloc: use o...
3078
  	BUG_ON(offset_in_page(align) || !is_power_of_2(align));
ca23e405e   Tejun Heo   vmalloc: implemen...
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
  	for (last_area = 0, area = 0; area < nr_vms; area++) {
  		start = offsets[area];
  		end = start + sizes[area];
  
  		/* is everything aligned properly? */
  		BUG_ON(!IS_ALIGNED(offsets[area], align));
  		BUG_ON(!IS_ALIGNED(sizes[area], align));
  
  		/* detect the area with the highest address */
  		if (start > offsets[last_area])
  			last_area = area;
c568da282   Wei Yang   mm/vmalloc.c: hal...
3090
  		for (area2 = area + 1; area2 < nr_vms; area2++) {
ca23e405e   Tejun Heo   vmalloc: implemen...
3091
3092
  			unsigned long start2 = offsets[area2];
  			unsigned long end2 = start2 + sizes[area2];
c568da282   Wei Yang   mm/vmalloc.c: hal...
3093
  			BUG_ON(start2 < end && start < end2);
ca23e405e   Tejun Heo   vmalloc: implemen...
3094
3095
3096
3097
3098
3099
3100
3101
  		}
  	}
  	last_end = offsets[last_area] + sizes[last_area];
  
  	if (vmalloc_end - vmalloc_start < last_end) {
  		WARN_ON(true);
  		return NULL;
  	}
4d67d8605   Thomas Meyer   mm: use kcalloc()...
3102
3103
  	vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
  	vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
ca23e405e   Tejun Heo   vmalloc: implemen...
3104
  	if (!vas || !vms)
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3105
  		goto err_free2;
ca23e405e   Tejun Heo   vmalloc: implemen...
3106
3107
  
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3108
  		vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
ec3f64fc9   David Rientjes   mm: remove gfp ma...
3109
  		vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
ca23e405e   Tejun Heo   vmalloc: implemen...
3110
3111
3112
3113
  		if (!vas[area] || !vms[area])
  			goto err_free;
  	}
  retry:
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3114
  	spin_lock(&free_vmap_area_lock);
ca23e405e   Tejun Heo   vmalloc: implemen...
3115
3116
3117
3118
3119
  
  	/* start scanning - we scan from the top, begin with the last area */
  	area = term_area = last_area;
  	start = offsets[area];
  	end = start + sizes[area];
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3120
3121
  	va = pvm_find_va_enclose_addr(vmalloc_end);
  	base = pvm_determine_end_from_reverse(&va, align) - end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3122
3123
  
  	while (true) {
ca23e405e   Tejun Heo   vmalloc: implemen...
3124
3125
3126
3127
  		/*
  		 * base might have underflowed, add last_end before
  		 * comparing.
  		 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3128
3129
  		if (base + last_end < vmalloc_start + last_end)
  			goto overflow;
ca23e405e   Tejun Heo   vmalloc: implemen...
3130
3131
  
  		/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3132
  		 * Fitting base has not been found.
ca23e405e   Tejun Heo   vmalloc: implemen...
3133
  		 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3134
3135
  		if (va == NULL)
  			goto overflow;
ca23e405e   Tejun Heo   vmalloc: implemen...
3136
3137
  
  		/*
d8cc323d9   Qiujun Huang   mm/vmalloc: fix a...
3138
  		 * If required width exceeds current VA block, move
5336e52c9   Kuppuswamy Sathyanarayanan   mm/vmalloc.c: fix...
3139
3140
3141
3142
3143
3144
3145
3146
3147
  		 * base downwards and then recheck.
  		 */
  		if (base + end > va->va_end) {
  			base = pvm_determine_end_from_reverse(&va, align) - end;
  			term_area = area;
  			continue;
  		}
  
  		/*
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3148
  		 * If this VA does not fit, move base downwards and recheck.
ca23e405e   Tejun Heo   vmalloc: implemen...
3149
  		 */
5336e52c9   Kuppuswamy Sathyanarayanan   mm/vmalloc.c: fix...
3150
  		if (base + start < va->va_start) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3151
3152
  			va = node_to_va(rb_prev(&va->rb_node));
  			base = pvm_determine_end_from_reverse(&va, align) - end;
ca23e405e   Tejun Heo   vmalloc: implemen...
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
  			term_area = area;
  			continue;
  		}
  
  		/*
  		 * This area fits, move on to the previous one.  If
  		 * the previous one is the terminal one, we're done.
  		 */
  		area = (area + nr_vms - 1) % nr_vms;
  		if (area == term_area)
  			break;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3164

ca23e405e   Tejun Heo   vmalloc: implemen...
3165
3166
  		start = offsets[area];
  		end = start + sizes[area];
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3167
  		va = pvm_find_va_enclose_addr(base + end);
ca23e405e   Tejun Heo   vmalloc: implemen...
3168
  	}
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3169

ca23e405e   Tejun Heo   vmalloc: implemen...
3170
3171
  	/* we've found a fitting base, insert all va's */
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3172
  		int ret;
ca23e405e   Tejun Heo   vmalloc: implemen...
3173

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3174
3175
  		start = base + offsets[area];
  		size = sizes[area];
ca23e405e   Tejun Heo   vmalloc: implemen...
3176

68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
  		va = pvm_find_va_enclose_addr(start);
  		if (WARN_ON_ONCE(va == NULL))
  			/* It is a BUG(), but trigger recovery instead. */
  			goto recovery;
  
  		type = classify_va_fit_type(va, start, size);
  		if (WARN_ON_ONCE(type == NOTHING_FIT))
  			/* It is a BUG(), but trigger recovery instead. */
  			goto recovery;
  
  		ret = adjust_va_to_fit_type(va, start, size, type);
  		if (unlikely(ret))
  			goto recovery;
  
  		/* Allocated area. */
  		va = vas[area];
  		va->va_start = start;
  		va->va_end = start + size;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3195
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3196

e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3197
  	spin_unlock(&free_vmap_area_lock);
ca23e405e   Tejun Heo   vmalloc: implemen...
3198

253a496d8   Daniel Axtens   kasan: don't assu...
3199
3200
3201
3202
3203
3204
3205
3206
  	/* populate the kasan shadow space */
  	for (area = 0; area < nr_vms; area++) {
  		if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
  			goto err_free_shadow;
  
  		kasan_unpoison_vmalloc((void *)vas[area]->va_start,
  				       sizes[area]);
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3207
  	/* insert all vm's */
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3208
3209
3210
3211
3212
  	spin_lock(&vmap_area_lock);
  	for (area = 0; area < nr_vms; area++) {
  		insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list);
  
  		setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
3645cb4a4   Zhang Yanfei   mm, vmalloc: call...
3213
  				 pcpu_get_vm_areas);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3214
3215
  	}
  	spin_unlock(&vmap_area_lock);
ca23e405e   Tejun Heo   vmalloc: implemen...
3216
3217
3218
  
  	kfree(vas);
  	return vms;
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3219
  recovery:
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3220
3221
3222
3223
3224
3225
  	/*
  	 * Remove previously allocated areas. There is no
  	 * need in removing these areas from the busy tree,
  	 * because they are inserted only on the final step
  	 * and when pcpu_get_vm_areas() is success.
  	 */
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3226
  	while (area--) {
253a496d8   Daniel Axtens   kasan: don't assu...
3227
3228
3229
3230
  		orig_start = vas[area]->va_start;
  		orig_end = vas[area]->va_end;
  		va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
  					    &free_vmap_area_list);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
3231
3232
3233
  		if (va)
  			kasan_release_vmalloc(orig_start, orig_end,
  				va->va_start, va->va_end);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3234
3235
3236
3237
  		vas[area] = NULL;
  	}
  
  overflow:
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3238
  	spin_unlock(&free_vmap_area_lock);
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
  	if (!purged) {
  		purge_vmap_area_lazy();
  		purged = true;
  
  		/* Before "retry", check if we recover. */
  		for (area = 0; area < nr_vms; area++) {
  			if (vas[area])
  				continue;
  
  			vas[area] = kmem_cache_zalloc(
  				vmap_area_cachep, GFP_KERNEL);
  			if (!vas[area])
  				goto err_free;
  		}
  
  		goto retry;
  	}
ca23e405e   Tejun Heo   vmalloc: implemen...
3256
3257
  err_free:
  	for (area = 0; area < nr_vms; area++) {
68ad4a330   Uladzislau Rezki (Sony)   mm/vmalloc.c: kee...
3258
3259
  		if (vas[area])
  			kmem_cache_free(vmap_area_cachep, vas[area]);
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3260
  		kfree(vms[area]);
ca23e405e   Tejun Heo   vmalloc: implemen...
3261
  	}
f1db7afd9   Kautuk Consul   mm/vmalloc.c: eli...
3262
  err_free2:
ca23e405e   Tejun Heo   vmalloc: implemen...
3263
3264
3265
  	kfree(vas);
  	kfree(vms);
  	return NULL;
253a496d8   Daniel Axtens   kasan: don't assu...
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
  
  err_free_shadow:
  	spin_lock(&free_vmap_area_lock);
  	/*
  	 * We release all the vmalloc shadows, even the ones for regions that
  	 * hadn't been successfully added. This relies on kasan_release_vmalloc
  	 * being able to tolerate this case.
  	 */
  	for (area = 0; area < nr_vms; area++) {
  		orig_start = vas[area]->va_start;
  		orig_end = vas[area]->va_end;
  		va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
  					    &free_vmap_area_list);
9c801f61d   Uladzislau Rezki (Sony)   mm/vmalloc.c: rem...
3279
3280
3281
  		if (va)
  			kasan_release_vmalloc(orig_start, orig_end,
  				va->va_start, va->va_end);
253a496d8   Daniel Axtens   kasan: don't assu...
3282
3283
3284
3285
3286
3287
3288
  		vas[area] = NULL;
  		kfree(vms[area]);
  	}
  	spin_unlock(&free_vmap_area_lock);
  	kfree(vas);
  	kfree(vms);
  	return NULL;
ca23e405e   Tejun Heo   vmalloc: implemen...
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
  }
  
  /**
   * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
   * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
   * @nr_vms: the number of allocated areas
   *
   * Free vm_structs and the array allocated by pcpu_get_vm_areas().
   */
  void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
  {
  	int i;
  
  	for (i = 0; i < nr_vms; i++)
  		free_vm_area(vms[i]);
  	kfree(vms);
  }
4f8b02b4e   Tejun Heo   vmalloc: pcpu_get...
3306
  #endif	/* CONFIG_SMP */
a10aa5798   Christoph Lameter   vmalloc: show vma...
3307
3308
3309
  
  #ifdef CONFIG_PROC_FS
  static void *s_start(struct seq_file *m, loff_t *pos)
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3310
  	__acquires(&vmap_purge_lock)
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3311
  	__acquires(&vmap_area_lock)
a10aa5798   Christoph Lameter   vmalloc: show vma...
3312
  {
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3313
  	mutex_lock(&vmap_purge_lock);
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3314
  	spin_lock(&vmap_area_lock);
e36176be1   Uladzislau Rezki (Sony)   mm/vmalloc: rewor...
3315

3f5000693   zijun_hu   mm/vmalloc.c: sim...
3316
  	return seq_list_start(&vmap_area_list, *pos);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3317
3318
3319
3320
  }
  
  static void *s_next(struct seq_file *m, void *p, loff_t *pos)
  {
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3321
  	return seq_list_next(p, &vmap_area_list, pos);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3322
3323
3324
  }
  
  static void s_stop(struct seq_file *m, void *p)
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3325
  	__releases(&vmap_area_lock)
4a9d8b078   Waiman Long   mm/vmalloc: Fix u...
3326
  	__releases(&vmap_purge_lock)
a10aa5798   Christoph Lameter   vmalloc: show vma...
3327
  {
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3328
  	spin_unlock(&vmap_area_lock);
4a9d8b078   Waiman Long   mm/vmalloc: Fix u...
3329
  	mutex_unlock(&vmap_purge_lock);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3330
  }
a47a126ad   Eric Dumazet   vmallocinfo: add ...
3331
3332
  static void show_numa_info(struct seq_file *m, struct vm_struct *v)
  {
e5adfffc8   Kirill A. Shutemov   mm: use IS_ENABLE...
3333
  	if (IS_ENABLED(CONFIG_NUMA)) {
a47a126ad   Eric Dumazet   vmallocinfo: add ...
3334
3335
3336
3337
  		unsigned int nr, *counters = m->private;
  
  		if (!counters)
  			return;
af12346cd   Wanpeng Li   mm/vmalloc: rever...
3338
3339
  		if (v->flags & VM_UNINITIALIZED)
  			return;
7e5b528b4   Dmitry Vyukov   mm/vmalloc.c: fix...
3340
3341
  		/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
  		smp_rmb();
af12346cd   Wanpeng Li   mm/vmalloc: rever...
3342

a47a126ad   Eric Dumazet   vmallocinfo: add ...
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
  		memset(counters, 0, nr_node_ids * sizeof(unsigned int));
  
  		for (nr = 0; nr < v->nr_pages; nr++)
  			counters[page_to_nid(v->pages[nr])]++;
  
  		for_each_node_state(nr, N_HIGH_MEMORY)
  			if (counters[nr])
  				seq_printf(m, " N%u=%u", nr, counters[nr]);
  	}
  }
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
  static void show_purge_info(struct seq_file *m)
  {
  	struct llist_node *head;
  	struct vmap_area *va;
  
  	head = READ_ONCE(vmap_purge_list.first);
  	if (head == NULL)
  		return;
  
  	llist_for_each_entry(va, head, purge_list) {
  		seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area
  ",
  			(void *)va->va_start, (void *)va->va_end,
  			va->va_end - va->va_start);
  	}
  }
a10aa5798   Christoph Lameter   vmalloc: show vma...
3369
3370
  static int s_show(struct seq_file *m, void *p)
  {
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3371
  	struct vmap_area *va;
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3372
  	struct vm_struct *v;
3f5000693   zijun_hu   mm/vmalloc.c: sim...
3373
  	va = list_entry(p, struct vmap_area, list);
c2ce8c142   Wanpeng Li   mm/vmalloc: fix s...
3374
  	/*
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
3375
3376
  	 * s_show can encounter race with remove_vm_area, !vm on behalf
  	 * of vmap area is being tear down or vm_map_ram allocation.
c2ce8c142   Wanpeng Li   mm/vmalloc: fix s...
3377
  	 */
688fcbfc0   Pengfei Li   mm/vmalloc: modif...
3378
  	if (!va->vm) {
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3379
3380
  		seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram
  ",
78c72746f   Yisheng Xie   vmalloc: show laz...
3381
  			(void *)va->va_start, (void *)va->va_end,
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3382
  			va->va_end - va->va_start);
78c72746f   Yisheng Xie   vmalloc: show laz...
3383

d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3384
  		return 0;
78c72746f   Yisheng Xie   vmalloc: show laz...
3385
  	}
d4033afdf   Joonsoo Kim   mm, vmalloc: iter...
3386
3387
  
  	v = va->vm;
a10aa5798   Christoph Lameter   vmalloc: show vma...
3388

45ec16908   Kees Cook   mm: use %pK for /...
3389
  	seq_printf(m, "0x%pK-0x%pK %7ld",
a10aa5798   Christoph Lameter   vmalloc: show vma...
3390
  		v->addr, v->addr + v->size, v->size);
62c70bce8   Joe Perches   mm: convert sprin...
3391
3392
  	if (v->caller)
  		seq_printf(m, " %pS", v->caller);
230169693   Christoph Lameter   vmallocinfo: add ...
3393

a10aa5798   Christoph Lameter   vmalloc: show vma...
3394
3395
3396
3397
  	if (v->nr_pages)
  		seq_printf(m, " pages=%d", v->nr_pages);
  
  	if (v->phys_addr)
199eaa05a   Miles Chen   mm: cleanups for ...
3398
  		seq_printf(m, " phys=%pa", &v->phys_addr);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3399
3400
  
  	if (v->flags & VM_IOREMAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3401
  		seq_puts(m, " ioremap");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3402
3403
  
  	if (v->flags & VM_ALLOC)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3404
  		seq_puts(m, " vmalloc");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3405
3406
  
  	if (v->flags & VM_MAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3407
  		seq_puts(m, " vmap");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3408
3409
  
  	if (v->flags & VM_USERMAP)
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3410
  		seq_puts(m, " user");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3411

fe9041c24   Christoph Hellwig   vmalloc: lift the...
3412
3413
  	if (v->flags & VM_DMA_COHERENT)
  		seq_puts(m, " dma-coherent");
244d63ee3   David Rientjes   mm, vmalloc: remo...
3414
  	if (is_vmalloc_addr(v->pages))
f4527c908   Fabian Frederick   mm/vmalloc.c: rep...
3415
  		seq_puts(m, " vpages");
a10aa5798   Christoph Lameter   vmalloc: show vma...
3416

a47a126ad   Eric Dumazet   vmallocinfo: add ...
3417
  	show_numa_info(m, v);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3418
3419
  	seq_putc(m, '
  ');
dd3b8353b   Uladzislau Rezki (Sony)   mm/vmalloc: do no...
3420
3421
3422
3423
3424
3425
3426
3427
3428
  
  	/*
  	 * As a final step, dump "unpurged" areas. Note,
  	 * that entire "/proc/vmallocinfo" output will not
  	 * be address sorted, because the purge list is not
  	 * sorted.
  	 */
  	if (list_is_last(&va->list, &vmap_area_list))
  		show_purge_info(m);
a10aa5798   Christoph Lameter   vmalloc: show vma...
3429
3430
  	return 0;
  }
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3431
  static const struct seq_operations vmalloc_op = {
a10aa5798   Christoph Lameter   vmalloc: show vma...
3432
3433
3434
3435
3436
  	.start = s_start,
  	.next = s_next,
  	.stop = s_stop,
  	.show = s_show,
  };
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3437

5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3438
3439
  static int __init proc_vmalloc_init(void)
  {
fddda2b7b   Christoph Hellwig   proc: introduce p...
3440
  	if (IS_ENABLED(CONFIG_NUMA))
0825a6f98   Joe Perches   mm: use octal not...
3441
  		proc_create_seq_private("vmallocinfo", 0400, NULL,
44414d82c   Christoph Hellwig   proc: introduce p...
3442
3443
  				&vmalloc_op,
  				nr_node_ids * sizeof(unsigned int), NULL);
fddda2b7b   Christoph Hellwig   proc: introduce p...
3444
  	else
0825a6f98   Joe Perches   mm: use octal not...
3445
  		proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
5f6a6a9c4   Alexey Dobriyan   proc: move /proc/...
3446
3447
3448
  	return 0;
  }
  module_init(proc_vmalloc_init);
db3808c1b   Joonsoo Kim   mm, vmalloc: move...
3449

a10aa5798   Christoph Lameter   vmalloc: show vma...
3450
  #endif