Blame view

mm/sparse-vmemmap.c 15.7 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
8f6aac419   Christoph Lameter   Generic Virtual M...
2
3
4
  /*
   * Virtual Memory Map support
   *
cde535359   Christoph Lameter   Christoph has moved
5
   * (C) 2007 sgi. Christoph Lameter.
8f6aac419   Christoph Lameter   Generic Virtual M...
6
7
8
9
10
11
12
   *
   * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
   * virt_to_page, page_address() to be implemented as a base offset
   * calculation without memory access.
   *
   * However, virtual mappings need a page table and TLBs. Many Linux
   * architectures already map their physical space using 1-1 mappings
b595076a1   Uwe Kleine-König   tree-wide: fix co...
13
   * via TLBs. For those arches the virtual memory map is essentially
8f6aac419   Christoph Lameter   Generic Virtual M...
14
15
16
17
   * for free if we use the same page size as the 1-1 mappings. In that
   * case the overhead consists of a few additional pages that are
   * allocated to create a view of memory for vmemmap.
   *
29c71111d   Andy Whitcroft   vmemmap: generify...
18
19
   * The architecture is expected to provide a vmemmap_populate() function
   * to instantiate the mapping.
8f6aac419   Christoph Lameter   Generic Virtual M...
20
21
22
   */
  #include <linux/mm.h>
  #include <linux/mmzone.h>
97ad1087e   Mike Rapoport   memblock: replace...
23
  #include <linux/memblock.h>
4b94ffdc4   Dan Williams   x86, mm: introduc...
24
  #include <linux/memremap.h>
8f6aac419   Christoph Lameter   Generic Virtual M...
25
  #include <linux/highmem.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
26
  #include <linux/slab.h>
8f6aac419   Christoph Lameter   Generic Virtual M...
27
28
  #include <linux/spinlock.h>
  #include <linux/vmalloc.h>
8bca44bbd   Glauber de Oliveira Costa   mm/sparse-vmemmap...
29
  #include <linux/sched.h>
f41f2ed43   Muchun Song   mm: hugetlb: free...
30
31
  #include <linux/pgtable.h>
  #include <linux/bootmem_info.h>
8f6aac419   Christoph Lameter   Generic Virtual M...
32
33
  #include <asm/dma.h>
  #include <asm/pgalloc.h>
f41f2ed43   Muchun Song   mm: hugetlb: free...
34
35
36
37
38
39
  #include <asm/tlbflush.h>
  
  /**
   * struct vmemmap_remap_walk - walk vmemmap page table
   *
   * @remap_pte:		called for each lowest-level entry (PTE).
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
40
   * @nr_walked:		the number of walked pte.
f41f2ed43   Muchun Song   mm: hugetlb: free...
41
42
   * @reuse_page:		the page which is reused for the tail vmemmap pages.
   * @reuse_addr:		the virtual address of the @reuse_page page.
ad2fa3717   Muchun Song   mm: hugetlb: allo...
43
44
   * @vmemmap_pages:	the list head of the vmemmap pages that can be freed
   *			or is mapped from.
f41f2ed43   Muchun Song   mm: hugetlb: free...
45
46
47
48
   */
  struct vmemmap_remap_walk {
  	void (*remap_pte)(pte_t *pte, unsigned long addr,
  			  struct vmemmap_remap_walk *walk);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
49
  	unsigned long nr_walked;
f41f2ed43   Muchun Song   mm: hugetlb: free...
50
51
52
53
  	struct page *reuse_page;
  	unsigned long reuse_addr;
  	struct list_head *vmemmap_pages;
  };
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
  static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start,
  				  struct vmemmap_remap_walk *walk)
  {
  	pmd_t __pmd;
  	int i;
  	unsigned long addr = start;
  	struct page *page = pmd_page(*pmd);
  	pte_t *pgtable = pte_alloc_one_kernel(&init_mm);
  
  	if (!pgtable)
  		return -ENOMEM;
  
  	pmd_populate_kernel(&init_mm, &__pmd, pgtable);
  
  	for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, addr += PAGE_SIZE) {
  		pte_t entry, *pte;
  		pgprot_t pgprot = PAGE_KERNEL;
  
  		entry = mk_pte(page + i, pgprot);
  		pte = pte_offset_kernel(&__pmd, addr);
  		set_pte_at(&init_mm, addr, pte, entry);
  	}
  
  	/* Make pte visible before pmd. See comment in __pte_alloc(). */
  	smp_wmb();
  	pmd_populate_kernel(&init_mm, pmd, pgtable);
  
  	flush_tlb_kernel_range(start, start + PMD_SIZE);
  
  	return 0;
  }
f41f2ed43   Muchun Song   mm: hugetlb: free...
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr,
  			      unsigned long end,
  			      struct vmemmap_remap_walk *walk)
  {
  	pte_t *pte = pte_offset_kernel(pmd, addr);
  
  	/*
  	 * The reuse_page is found 'first' in table walk before we start
  	 * remapping (which is calling @walk->remap_pte).
  	 */
  	if (!walk->reuse_page) {
  		walk->reuse_page = pte_page(*pte);
  		/*
  		 * Because the reuse address is part of the range that we are
  		 * walking, skip the reuse address range.
  		 */
  		addr += PAGE_SIZE;
  		pte++;
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
103
  		walk->nr_walked++;
f41f2ed43   Muchun Song   mm: hugetlb: free...
104
  	}
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
105
  	for (; addr != end; addr += PAGE_SIZE, pte++) {
f41f2ed43   Muchun Song   mm: hugetlb: free...
106
  		walk->remap_pte(pte, addr, walk);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
107
108
  		walk->nr_walked++;
  	}
f41f2ed43   Muchun Song   mm: hugetlb: free...
109
  }
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
110
111
112
  static int vmemmap_pmd_range(pud_t *pud, unsigned long addr,
  			     unsigned long end,
  			     struct vmemmap_remap_walk *walk)
f41f2ed43   Muchun Song   mm: hugetlb: free...
113
114
115
116
117
118
  {
  	pmd_t *pmd;
  	unsigned long next;
  
  	pmd = pmd_offset(pud, addr);
  	do {
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
119
120
  		if (pmd_leaf(*pmd)) {
  			int ret;
f41f2ed43   Muchun Song   mm: hugetlb: free...
121

3bc2b6a72   Muchun Song   mm: sparsemem: sp...
122
123
124
125
  			ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK, walk);
  			if (ret)
  				return ret;
  		}
f41f2ed43   Muchun Song   mm: hugetlb: free...
126
127
128
  		next = pmd_addr_end(addr, end);
  		vmemmap_pte_range(pmd, addr, next, walk);
  	} while (pmd++, addr = next, addr != end);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
129
130
  
  	return 0;
f41f2ed43   Muchun Song   mm: hugetlb: free...
131
  }
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
132
133
134
  static int vmemmap_pud_range(p4d_t *p4d, unsigned long addr,
  			     unsigned long end,
  			     struct vmemmap_remap_walk *walk)
f41f2ed43   Muchun Song   mm: hugetlb: free...
135
136
137
138
139
140
  {
  	pud_t *pud;
  	unsigned long next;
  
  	pud = pud_offset(p4d, addr);
  	do {
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
141
  		int ret;
f41f2ed43   Muchun Song   mm: hugetlb: free...
142
  		next = pud_addr_end(addr, end);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
143
144
145
  		ret = vmemmap_pmd_range(pud, addr, next, walk);
  		if (ret)
  			return ret;
f41f2ed43   Muchun Song   mm: hugetlb: free...
146
  	} while (pud++, addr = next, addr != end);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
147
148
  
  	return 0;
f41f2ed43   Muchun Song   mm: hugetlb: free...
149
  }
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
150
151
152
  static int vmemmap_p4d_range(pgd_t *pgd, unsigned long addr,
  			     unsigned long end,
  			     struct vmemmap_remap_walk *walk)
f41f2ed43   Muchun Song   mm: hugetlb: free...
153
154
155
156
157
158
  {
  	p4d_t *p4d;
  	unsigned long next;
  
  	p4d = p4d_offset(pgd, addr);
  	do {
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
159
  		int ret;
f41f2ed43   Muchun Song   mm: hugetlb: free...
160
  		next = p4d_addr_end(addr, end);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
161
162
163
  		ret = vmemmap_pud_range(p4d, addr, next, walk);
  		if (ret)
  			return ret;
f41f2ed43   Muchun Song   mm: hugetlb: free...
164
  	} while (p4d++, addr = next, addr != end);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
165
166
  
  	return 0;
f41f2ed43   Muchun Song   mm: hugetlb: free...
167
  }
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
168
169
  static int vmemmap_remap_range(unsigned long start, unsigned long end,
  			       struct vmemmap_remap_walk *walk)
f41f2ed43   Muchun Song   mm: hugetlb: free...
170
171
172
173
174
175
176
177
178
179
  {
  	unsigned long addr = start;
  	unsigned long next;
  	pgd_t *pgd;
  
  	VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
  	VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
  
  	pgd = pgd_offset_k(addr);
  	do {
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
180
  		int ret;
f41f2ed43   Muchun Song   mm: hugetlb: free...
181
  		next = pgd_addr_end(addr, end);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
182
183
184
  		ret = vmemmap_p4d_range(pgd, addr, next, walk);
  		if (ret)
  			return ret;
f41f2ed43   Muchun Song   mm: hugetlb: free...
185
186
187
188
189
190
191
192
  	} while (pgd++, addr = next, addr != end);
  
  	/*
  	 * We only change the mapping of the vmemmap virtual address range
  	 * [@start + PAGE_SIZE, end), so we only need to flush the TLB which
  	 * belongs to the range.
  	 */
  	flush_tlb_kernel_range(start + PAGE_SIZE, end);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
193
194
  
  	return 0;
f41f2ed43   Muchun Song   mm: hugetlb: free...
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
  }
  
  /*
   * Free a vmemmap page. A vmemmap page can be allocated from the memblock
   * allocator or buddy allocator. If the PG_reserved flag is set, it means
   * that it allocated from the memblock allocator, just free it via the
   * free_bootmem_page(). Otherwise, use __free_page().
   */
  static inline void free_vmemmap_page(struct page *page)
  {
  	if (PageReserved(page))
  		free_bootmem_page(page);
  	else
  		__free_page(page);
  }
  
  /* Free a list of the vmemmap pages */
  static void free_vmemmap_page_list(struct list_head *list)
  {
  	struct page *page, *next;
  
  	list_for_each_entry_safe(page, next, list, lru) {
  		list_del(&page->lru);
  		free_vmemmap_page(page);
  	}
  }
  
  static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
  			      struct vmemmap_remap_walk *walk)
  {
  	/*
  	 * Remap the tail pages as read-only to catch illegal write operation
  	 * to the tail pages.
  	 */
  	pgprot_t pgprot = PAGE_KERNEL_RO;
  	pte_t entry = mk_pte(walk->reuse_page, pgprot);
  	struct page *page = pte_page(*pte);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
232
  	list_add_tail(&page->lru, walk->vmemmap_pages);
f41f2ed43   Muchun Song   mm: hugetlb: free...
233
234
  	set_pte_at(&init_mm, addr, pte, entry);
  }
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
  static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
  				struct vmemmap_remap_walk *walk)
  {
  	pgprot_t pgprot = PAGE_KERNEL;
  	struct page *page;
  	void *to;
  
  	BUG_ON(pte_page(*pte) != walk->reuse_page);
  
  	page = list_first_entry(walk->vmemmap_pages, struct page, lru);
  	list_del(&page->lru);
  	to = page_to_virt(page);
  	copy_page(to, (void *)walk->reuse_addr);
  
  	set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
  }
f41f2ed43   Muchun Song   mm: hugetlb: free...
251
252
253
254
255
256
257
258
259
260
  /**
   * vmemmap_remap_free - remap the vmemmap virtual address range [@start, @end)
   *			to the page which @reuse is mapped to, then free vmemmap
   *			which the range are mapped to.
   * @start:	start address of the vmemmap virtual address range that we want
   *		to remap.
   * @end:	end address of the vmemmap virtual address range that we want to
   *		remap.
   * @reuse:	reuse address.
   *
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
261
   * Return: %0 on success, negative error code otherwise.
f41f2ed43   Muchun Song   mm: hugetlb: free...
262
   */
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
263
264
  int vmemmap_remap_free(unsigned long start, unsigned long end,
  		       unsigned long reuse)
f41f2ed43   Muchun Song   mm: hugetlb: free...
265
  {
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
266
  	int ret;
f41f2ed43   Muchun Song   mm: hugetlb: free...
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
  	LIST_HEAD(vmemmap_pages);
  	struct vmemmap_remap_walk walk = {
  		.remap_pte	= vmemmap_remap_pte,
  		.reuse_addr	= reuse,
  		.vmemmap_pages	= &vmemmap_pages,
  	};
  
  	/*
  	 * In order to make remapping routine most efficient for the huge pages,
  	 * the routine of vmemmap page table walking has the following rules
  	 * (see more details from the vmemmap_pte_range()):
  	 *
  	 * - The range [@start, @end) and the range [@reuse, @reuse + PAGE_SIZE)
  	 *   should be continuous.
  	 * - The @reuse address is part of the range [@reuse, @end) that we are
  	 *   walking which is passed to vmemmap_remap_range().
  	 * - The @reuse address is the first in the complete range.
  	 *
  	 * So we need to make sure that @start and @reuse meet the above rules.
  	 */
  	BUG_ON(start - reuse != PAGE_SIZE);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
288
289
290
  	mmap_write_lock(&init_mm);
  	ret = vmemmap_remap_range(reuse, end, &walk);
  	mmap_write_downgrade(&init_mm);
8f6aac419   Christoph Lameter   Generic Virtual M...
291

3bc2b6a72   Muchun Song   mm: sparsemem: sp...
292
293
294
295
296
297
298
299
300
301
302
303
304
  	if (ret && walk.nr_walked) {
  		end = reuse + walk.nr_walked * PAGE_SIZE;
  		/*
  		 * vmemmap_pages contains pages from the previous
  		 * vmemmap_remap_range call which failed.  These
  		 * are pages which were removed from the vmemmap.
  		 * They will be restored in the following call.
  		 */
  		walk = (struct vmemmap_remap_walk) {
  			.remap_pte	= vmemmap_restore_pte,
  			.reuse_addr	= reuse,
  			.vmemmap_pages	= &vmemmap_pages,
  		};
ad2fa3717   Muchun Song   mm: hugetlb: allo...
305

3bc2b6a72   Muchun Song   mm: sparsemem: sp...
306
307
308
  		vmemmap_remap_range(reuse, end, &walk);
  	}
  	mmap_read_unlock(&init_mm);
ad2fa3717   Muchun Song   mm: hugetlb: allo...
309

3bc2b6a72   Muchun Song   mm: sparsemem: sp...
310
  	free_vmemmap_page_list(&vmemmap_pages);
ad2fa3717   Muchun Song   mm: hugetlb: allo...
311

3bc2b6a72   Muchun Song   mm: sparsemem: sp...
312
  	return ret;
ad2fa3717   Muchun Song   mm: hugetlb: allo...
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
  }
  
  static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
  				   gfp_t gfp_mask, struct list_head *list)
  {
  	unsigned long nr_pages = (end - start) >> PAGE_SHIFT;
  	int nid = page_to_nid((struct page *)start);
  	struct page *page, *next;
  
  	while (nr_pages--) {
  		page = alloc_pages_node(nid, gfp_mask, 0);
  		if (!page)
  			goto out;
  		list_add_tail(&page->lru, list);
  	}
  
  	return 0;
  out:
  	list_for_each_entry_safe(page, next, list, lru)
  		__free_pages(page, 0);
  	return -ENOMEM;
  }
  
  /**
   * vmemmap_remap_alloc - remap the vmemmap virtual address range [@start, end)
   *			 to the page which is from the @vmemmap_pages
   *			 respectively.
   * @start:	start address of the vmemmap virtual address range that we want
   *		to remap.
   * @end:	end address of the vmemmap virtual address range that we want to
   *		remap.
   * @reuse:	reuse address.
   * @gfp_mask:	GFP flag for allocating vmemmap pages.
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
346
347
   *
   * Return: %0 on success, negative error code otherwise.
ad2fa3717   Muchun Song   mm: hugetlb: allo...
348
349
350
351
352
353
354
355
356
357
358
359
360
   */
  int vmemmap_remap_alloc(unsigned long start, unsigned long end,
  			unsigned long reuse, gfp_t gfp_mask)
  {
  	LIST_HEAD(vmemmap_pages);
  	struct vmemmap_remap_walk walk = {
  		.remap_pte	= vmemmap_restore_pte,
  		.reuse_addr	= reuse,
  		.vmemmap_pages	= &vmemmap_pages,
  	};
  
  	/* See the comment in the vmemmap_remap_free(). */
  	BUG_ON(start - reuse != PAGE_SIZE);
ad2fa3717   Muchun Song   mm: hugetlb: allo...
361
362
  	if (alloc_vmemmap_page_list(start, end, gfp_mask, &vmemmap_pages))
  		return -ENOMEM;
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
363
  	mmap_read_lock(&init_mm);
ad2fa3717   Muchun Song   mm: hugetlb: allo...
364
  	vmemmap_remap_range(reuse, end, &walk);
3bc2b6a72   Muchun Song   mm: sparsemem: sp...
365
  	mmap_read_unlock(&init_mm);
ad2fa3717   Muchun Song   mm: hugetlb: allo...
366
367
368
  
  	return 0;
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
369
370
371
372
373
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
   * Uses the main allocators if they are available, else bootmem.
   */
e0dc3a53d   KAMEZAWA Hiroyuki   memory hotplug fi...
374

bd721ea73   Fabian Frederick   treewide: replace...
375
  static void * __ref __earlyonly_bootmem_alloc(int node,
e0dc3a53d   KAMEZAWA Hiroyuki   memory hotplug fi...
376
377
378
379
  				unsigned long size,
  				unsigned long align,
  				unsigned long goal)
  {
eb31d559f   Mike Rapoport   memblock: remove ...
380
  	return memblock_alloc_try_nid_raw(size, align, goal,
97ad1087e   Mike Rapoport   memblock: replace...
381
  					       MEMBLOCK_ALLOC_ACCESSIBLE, node);
e0dc3a53d   KAMEZAWA Hiroyuki   memory hotplug fi...
382
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
383
384
385
386
  void * __meminit vmemmap_alloc_block(unsigned long size, int node)
  {
  	/* If the main allocator is up use that, fallback to bootmem. */
  	if (slab_is_available()) {
fcdaf842b   Michal Hocko   mm, sparse: do no...
387
388
389
  		gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
  		int order = get_order(size);
  		static bool warned;
f52407ce2   Shaohua Li   memory hotplug: a...
390
  		struct page *page;
fcdaf842b   Michal Hocko   mm, sparse: do no...
391
  		page = alloc_pages_node(node, gfp_mask, order);
8f6aac419   Christoph Lameter   Generic Virtual M...
392
393
  		if (page)
  			return page_address(page);
fcdaf842b   Michal Hocko   mm, sparse: do no...
394
395
396
397
398
399
  
  		if (!warned) {
  			warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
  				   "vmemmap alloc failure: order:%u", order);
  			warned = true;
  		}
8f6aac419   Christoph Lameter   Generic Virtual M...
400
401
  		return NULL;
  	} else
e0dc3a53d   KAMEZAWA Hiroyuki   memory hotplug fi...
402
  		return __earlyonly_bootmem_alloc(node, size, size,
8f6aac419   Christoph Lameter   Generic Virtual M...
403
404
  				__pa(MAX_DMA_ADDRESS));
  }
56993b4e1   Anshuman Khandual   mm/sparsemem: ena...
405
406
  static void * __meminit altmap_alloc_block_buf(unsigned long size,
  					       struct vmem_altmap *altmap);
9bdac9142   Yinghai Lu   sparsemem: Put me...
407
  /* need to make sure size is all the same during early stage */
56993b4e1   Anshuman Khandual   mm/sparsemem: ena...
408
409
  void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
  					 struct vmem_altmap *altmap)
9bdac9142   Yinghai Lu   sparsemem: Put me...
410
  {
56993b4e1   Anshuman Khandual   mm/sparsemem: ena...
411
412
413
414
  	void *ptr;
  
  	if (altmap)
  		return altmap_alloc_block_buf(size, altmap);
9bdac9142   Yinghai Lu   sparsemem: Put me...
415

56993b4e1   Anshuman Khandual   mm/sparsemem: ena...
416
  	ptr = sparse_buffer_alloc(size);
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
417
418
  	if (!ptr)
  		ptr = vmemmap_alloc_block(size, node);
9bdac9142   Yinghai Lu   sparsemem: Put me...
419
420
  	return ptr;
  }
4b94ffdc4   Dan Williams   x86, mm: introduc...
421
422
423
424
425
426
427
428
429
430
431
432
433
434
  static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
  {
  	return altmap->base_pfn + altmap->reserve + altmap->alloc
  		+ altmap->align;
  }
  
  static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
  {
  	unsigned long allocated = altmap->alloc + altmap->align;
  
  	if (altmap->free > allocated)
  		return altmap->free - allocated;
  	return 0;
  }
56993b4e1   Anshuman Khandual   mm/sparsemem: ena...
435
436
  static void * __meminit altmap_alloc_block_buf(unsigned long size,
  					       struct vmem_altmap *altmap)
4b94ffdc4   Dan Williams   x86, mm: introduc...
437
  {
eb8045335   Christoph Hellwig   mm: merge vmem_al...
438
  	unsigned long pfn, nr_pfns, nr_align;
4b94ffdc4   Dan Williams   x86, mm: introduc...
439
440
441
442
443
444
445
  
  	if (size & ~PAGE_MASK) {
  		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)
  ",
  				__func__, size);
  		return NULL;
  	}
eb8045335   Christoph Hellwig   mm: merge vmem_al...
446
  	pfn = vmem_altmap_next_pfn(altmap);
4b94ffdc4   Dan Williams   x86, mm: introduc...
447
  	nr_pfns = size >> PAGE_SHIFT;
eb8045335   Christoph Hellwig   mm: merge vmem_al...
448
449
450
451
452
453
454
455
  	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
  	nr_align = ALIGN(pfn, nr_align) - pfn;
  	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
  		return NULL;
  
  	altmap->alloc += nr_pfns;
  	altmap->align += nr_align;
  	pfn += nr_align;
4b94ffdc4   Dan Williams   x86, mm: introduc...
456
457
458
  	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx
  ",
  			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
eb8045335   Christoph Hellwig   mm: merge vmem_al...
459
  	return __va(__pfn_to_phys(pfn));
4b94ffdc4   Dan Williams   x86, mm: introduc...
460
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
461
462
463
464
465
  void __meminit vmemmap_verify(pte_t *pte, int node,
  				unsigned long start, unsigned long end)
  {
  	unsigned long pfn = pte_pfn(*pte);
  	int actual_node = early_pfn_to_nid(pfn);
b41ad14c3   David Rientjes   vmemmap: warn abo...
466
  	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
1170532bb   Joe Perches   mm: convert print...
467
468
469
  		pr_warn("[%lx-%lx] potential offnode page_structs
  ",
  			start, end - 1);
8f6aac419   Christoph Lameter   Generic Virtual M...
470
  }
1d9cfee75   Anshuman Khandual   mm/sparsemem: ena...
471
472
  pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
  				       struct vmem_altmap *altmap)
8f6aac419   Christoph Lameter   Generic Virtual M...
473
  {
29c71111d   Andy Whitcroft   vmemmap: generify...
474
475
476
  	pte_t *pte = pte_offset_kernel(pmd, addr);
  	if (pte_none(*pte)) {
  		pte_t entry;
1d9cfee75   Anshuman Khandual   mm/sparsemem: ena...
477
  		void *p;
56993b4e1   Anshuman Khandual   mm/sparsemem: ena...
478
  		p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
29c71111d   Andy Whitcroft   vmemmap: generify...
479
  		if (!p)
9dce07f1a   Al Viro   NULL noise: fs/*,...
480
  			return NULL;
29c71111d   Andy Whitcroft   vmemmap: generify...
481
482
483
484
  		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
  		set_pte_at(&init_mm, addr, pte, entry);
  	}
  	return pte;
8f6aac419   Christoph Lameter   Generic Virtual M...
485
  }
f7f99100d   Pavel Tatashin   mm: stop zeroing ...
486
487
488
489
490
491
492
493
494
495
  static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
  {
  	void *p = vmemmap_alloc_block(size, node);
  
  	if (!p)
  		return NULL;
  	memset(p, 0, size);
  
  	return p;
  }
29c71111d   Andy Whitcroft   vmemmap: generify...
496
  pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
8f6aac419   Christoph Lameter   Generic Virtual M...
497
  {
29c71111d   Andy Whitcroft   vmemmap: generify...
498
499
  	pmd_t *pmd = pmd_offset(pud, addr);
  	if (pmd_none(*pmd)) {
f7f99100d   Pavel Tatashin   mm: stop zeroing ...
500
  		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
29c71111d   Andy Whitcroft   vmemmap: generify...
501
  		if (!p)
9dce07f1a   Al Viro   NULL noise: fs/*,...
502
  			return NULL;
29c71111d   Andy Whitcroft   vmemmap: generify...
503
  		pmd_populate_kernel(&init_mm, pmd, p);
8f6aac419   Christoph Lameter   Generic Virtual M...
504
  	}
29c71111d   Andy Whitcroft   vmemmap: generify...
505
  	return pmd;
8f6aac419   Christoph Lameter   Generic Virtual M...
506
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
507

c2febafc6   Kirill A. Shutemov   mm: convert gener...
508
  pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
8f6aac419   Christoph Lameter   Generic Virtual M...
509
  {
c2febafc6   Kirill A. Shutemov   mm: convert gener...
510
  	pud_t *pud = pud_offset(p4d, addr);
29c71111d   Andy Whitcroft   vmemmap: generify...
511
  	if (pud_none(*pud)) {
f7f99100d   Pavel Tatashin   mm: stop zeroing ...
512
  		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
29c71111d   Andy Whitcroft   vmemmap: generify...
513
  		if (!p)
9dce07f1a   Al Viro   NULL noise: fs/*,...
514
  			return NULL;
29c71111d   Andy Whitcroft   vmemmap: generify...
515
516
517
518
  		pud_populate(&init_mm, pud, p);
  	}
  	return pud;
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
519

c2febafc6   Kirill A. Shutemov   mm: convert gener...
520
521
522
523
  p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
  {
  	p4d_t *p4d = p4d_offset(pgd, addr);
  	if (p4d_none(*p4d)) {
f7f99100d   Pavel Tatashin   mm: stop zeroing ...
524
  		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
c2febafc6   Kirill A. Shutemov   mm: convert gener...
525
526
527
528
529
530
  		if (!p)
  			return NULL;
  		p4d_populate(&init_mm, p4d, p);
  	}
  	return p4d;
  }
29c71111d   Andy Whitcroft   vmemmap: generify...
531
532
533
534
  pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
  {
  	pgd_t *pgd = pgd_offset_k(addr);
  	if (pgd_none(*pgd)) {
f7f99100d   Pavel Tatashin   mm: stop zeroing ...
535
  		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
29c71111d   Andy Whitcroft   vmemmap: generify...
536
  		if (!p)
9dce07f1a   Al Viro   NULL noise: fs/*,...
537
  			return NULL;
29c71111d   Andy Whitcroft   vmemmap: generify...
538
  		pgd_populate(&init_mm, pgd, p);
8f6aac419   Christoph Lameter   Generic Virtual M...
539
  	}
29c71111d   Andy Whitcroft   vmemmap: generify...
540
  	return pgd;
8f6aac419   Christoph Lameter   Generic Virtual M...
541
  }
1d9cfee75   Anshuman Khandual   mm/sparsemem: ena...
542
543
  int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
  					 int node, struct vmem_altmap *altmap)
8f6aac419   Christoph Lameter   Generic Virtual M...
544
  {
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
545
  	unsigned long addr = start;
29c71111d   Andy Whitcroft   vmemmap: generify...
546
  	pgd_t *pgd;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
547
  	p4d_t *p4d;
29c71111d   Andy Whitcroft   vmemmap: generify...
548
549
550
  	pud_t *pud;
  	pmd_t *pmd;
  	pte_t *pte;
8f6aac419   Christoph Lameter   Generic Virtual M...
551

29c71111d   Andy Whitcroft   vmemmap: generify...
552
553
554
555
  	for (; addr < end; addr += PAGE_SIZE) {
  		pgd = vmemmap_pgd_populate(addr, node);
  		if (!pgd)
  			return -ENOMEM;
c2febafc6   Kirill A. Shutemov   mm: convert gener...
556
557
558
559
  		p4d = vmemmap_p4d_populate(pgd, addr, node);
  		if (!p4d)
  			return -ENOMEM;
  		pud = vmemmap_pud_populate(p4d, addr, node);
29c71111d   Andy Whitcroft   vmemmap: generify...
560
561
562
563
564
  		if (!pud)
  			return -ENOMEM;
  		pmd = vmemmap_pmd_populate(pud, addr, node);
  		if (!pmd)
  			return -ENOMEM;
1d9cfee75   Anshuman Khandual   mm/sparsemem: ena...
565
  		pte = vmemmap_pte_populate(pmd, addr, node, altmap);
29c71111d   Andy Whitcroft   vmemmap: generify...
566
567
568
  		if (!pte)
  			return -ENOMEM;
  		vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
8f6aac419   Christoph Lameter   Generic Virtual M...
569
  	}
29c71111d   Andy Whitcroft   vmemmap: generify...
570
571
  
  	return 0;
8f6aac419   Christoph Lameter   Generic Virtual M...
572
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
573

e9c0a3f05   Dan Williams   mm/sparsemem: con...
574
575
  struct page * __meminit __populate_section_memmap(unsigned long pfn,
  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
8f6aac419   Christoph Lameter   Generic Virtual M...
576
  {
6cda72047   Wei Yang   mm/sparse: only s...
577
578
579
580
581
582
  	unsigned long start = (unsigned long) pfn_to_page(pfn);
  	unsigned long end = start + nr_pages * sizeof(struct page);
  
  	if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
  		!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
  		return NULL;
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
583

7b73d978a   Christoph Hellwig   mm: pass the vmem...
584
  	if (vmemmap_populate(start, end, nid, altmap))
8f6aac419   Christoph Lameter   Generic Virtual M...
585
  		return NULL;
e9c0a3f05   Dan Williams   mm/sparsemem: con...
586
  	return pfn_to_page(pfn);
8f6aac419   Christoph Lameter   Generic Virtual M...
587
  }