Blame view

mm/sparse.c 25.7 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
2
3
4
  /*
   * sparse memory mappings.
   */
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
5
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
6
  #include <linux/slab.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
7
  #include <linux/mmzone.h>
97ad1087e   Mike Rapoport   memblock: replace...
8
  #include <linux/memblock.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
9
  #include <linux/compiler.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
10
  #include <linux/highmem.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
11
  #include <linux/export.h>
28ae55c98   Dave Hansen   [PATCH] sparsemem...
12
  #include <linux/spinlock.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
13
  #include <linux/vmalloc.h>
9f82883c6   Alastair D'Silva   mm/sparse.c: don'...
14
15
  #include <linux/swap.h>
  #include <linux/swapops.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
16

0c0a4a517   Yasunori Goto   memory hotplug: f...
17
  #include "internal.h"
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
18
  #include <asm/dma.h>
8f6aac419   Christoph Lameter   Generic Virtual M...
19
20
  #include <asm/pgalloc.h>
  #include <asm/pgtable.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
21
22
23
24
25
26
  
  /*
   * Permanent SPARSEMEM data:
   *
   * 1) mem_section	- memory sections, mem_map's for valid memory
   */
3e347261a   Bob Picco   [PATCH] sparsemem...
27
  #ifdef CONFIG_SPARSEMEM_EXTREME
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
28
  struct mem_section **mem_section;
3e347261a   Bob Picco   [PATCH] sparsemem...
29
30
  #else
  struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
31
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
32
33
  #endif
  EXPORT_SYMBOL(mem_section);
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
34
35
36
37
38
39
40
41
42
43
44
  #ifdef NODE_NOT_IN_PAGE_FLAGS
  /*
   * If we did not store the node number in the page then we have to
   * do a lookup in the section_to_node_table in order to find which
   * node the page belongs to.
   */
  #if MAX_NUMNODES <= 256
  static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #else
  static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #endif
33dd4e0ec   Ian Campbell   mm: make some str...
45
  int page_to_nid(const struct page *page)
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
46
47
48
49
  {
  	return section_to_node_table[page_to_section(page)];
  }
  EXPORT_SYMBOL(page_to_nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
50
51
52
53
54
55
56
57
58
  
  static void set_section_nid(unsigned long section_nr, int nid)
  {
  	section_to_node_table[section_nr] = nid;
  }
  #else /* !NODE_NOT_IN_PAGE_FLAGS */
  static inline void set_section_nid(unsigned long section_nr, int nid)
  {
  }
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
59
  #endif
3e347261a   Bob Picco   [PATCH] sparsemem...
60
  #ifdef CONFIG_SPARSEMEM_EXTREME
bd721ea73   Fabian Frederick   treewide: replace...
61
  static noinline struct mem_section __ref *sparse_index_alloc(int nid)
28ae55c98   Dave Hansen   [PATCH] sparsemem...
62
63
64
65
  {
  	struct mem_section *section = NULL;
  	unsigned long array_size = SECTIONS_PER_ROOT *
  				   sizeof(struct mem_section);
8a7f97b90   Mike Rapoport   treewide: add che...
66
  	if (slab_is_available()) {
b95046b04   Michal Hocko   mm, sparse, page_...
67
  		section = kzalloc_node(array_size, GFP_KERNEL, nid);
8a7f97b90   Mike Rapoport   treewide: add che...
68
  	} else {
7e1c4e279   Mike Rapoport   memblock: stop us...
69
70
  		section = memblock_alloc_node(array_size, SMP_CACHE_BYTES,
  					      nid);
8a7f97b90   Mike Rapoport   treewide: add che...
71
72
73
74
75
  		if (!section)
  			panic("%s: Failed to allocate %lu bytes nid=%d
  ",
  			      __func__, array_size, nid);
  	}
28ae55c98   Dave Hansen   [PATCH] sparsemem...
76
77
  
  	return section;
3e347261a   Bob Picco   [PATCH] sparsemem...
78
  }
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
79

a3142c8e1   Yasunori Goto   Fix section misma...
80
  static int __meminit sparse_index_init(unsigned long section_nr, int nid)
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
81
  {
28ae55c98   Dave Hansen   [PATCH] sparsemem...
82
83
  	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  	struct mem_section *section;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
84

ba72b4c8c   Dan Williams   mm/sparsemem: sup...
85
86
87
88
89
90
91
  	/*
  	 * An existing section is possible in the sub-section hotplug
  	 * case. First hot-add instantiates, follow-on hot-add reuses
  	 * the existing section.
  	 *
  	 * The mem_hotplug_lock resolves the apparent race below.
  	 */
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
92
  	if (mem_section[root])
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
93
  		return 0;
3e347261a   Bob Picco   [PATCH] sparsemem...
94

28ae55c98   Dave Hansen   [PATCH] sparsemem...
95
  	section = sparse_index_alloc(nid);
af0cd5a7c   WANG Cong   mm/sparse.c: chec...
96
97
  	if (!section)
  		return -ENOMEM;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
98
99
  
  	mem_section[root] = section;
c1c951833   Gavin Shan   mm/sparse: remove...
100

9d1936cf8   Zhang Yanfei   mm/sparse: Remove...
101
  	return 0;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
102
103
104
105
106
  }
  #else /* !SPARSEMEM_EXTREME */
  static inline int sparse_index_init(unsigned long section_nr, int nid)
  {
  	return 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
107
  }
28ae55c98   Dave Hansen   [PATCH] sparsemem...
108
  #endif
91fd8b95d   Zhou Chengming   make __section_nr...
109
  #ifdef CONFIG_SPARSEMEM_EXTREME
2491f0a2c   David Hildenbrand   mm: section numbe...
110
  unsigned long __section_nr(struct mem_section *ms)
4ca644d97   Dave Hansen   [PATCH] memory ho...
111
112
  {
  	unsigned long root_nr;
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
113
  	struct mem_section *root = NULL;
4ca644d97   Dave Hansen   [PATCH] memory ho...
114

12783b002   Mike Kravetz   [PATCH] SPARSEMEM...
115
116
  	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
  		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
4ca644d97   Dave Hansen   [PATCH] memory ho...
117
118
119
120
121
122
  		if (!root)
  			continue;
  
  		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
  		     break;
  	}
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
123
  	VM_BUG_ON(!root);
db36a4611   Gavin Shan   mm/sparse: more c...
124

4ca644d97   Dave Hansen   [PATCH] memory ho...
125
126
  	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
  }
91fd8b95d   Zhou Chengming   make __section_nr...
127
  #else
2491f0a2c   David Hildenbrand   mm: section numbe...
128
  unsigned long __section_nr(struct mem_section *ms)
91fd8b95d   Zhou Chengming   make __section_nr...
129
  {
2491f0a2c   David Hildenbrand   mm: section numbe...
130
  	return (unsigned long)(ms - mem_section[0]);
91fd8b95d   Zhou Chengming   make __section_nr...
131
132
  }
  #endif
4ca644d97   Dave Hansen   [PATCH] memory ho...
133

30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
  /*
   * During early boot, before section_mem_map is used for an actual
   * mem_map, we use section_mem_map to store the section's NUMA
   * node.  This keeps us from having to use another data structure.  The
   * node information is cleared just before we store the real mem_map.
   */
  static inline unsigned long sparse_encode_early_nid(int nid)
  {
  	return (nid << SECTION_NID_SHIFT);
  }
  
  static inline int sparse_early_nid(struct mem_section *section)
  {
  	return (section->section_mem_map >> SECTION_NID_SHIFT);
  }
2dbb51c49   Mel Gorman   mm: make defensiv...
149
150
151
  /* Validate the physical addressing limitations of the model */
  void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
  						unsigned long *end_pfn)
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
152
  {
2dbb51c49   Mel Gorman   mm: make defensiv...
153
  	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
154

bead9a3ab   Ingo Molnar   mm: sparsemem mem...
155
156
157
158
  	/*
  	 * Sanity checks - do not allow an architecture to pass
  	 * in larger pfns than the maximum scope of sparsemem:
  	 */
2dbb51c49   Mel Gorman   mm: make defensiv...
159
160
161
162
163
164
165
166
  	if (*start_pfn > max_sparsemem_pfn) {
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*start_pfn = max_sparsemem_pfn;
  		*end_pfn = max_sparsemem_pfn;
ef161a986   Cyrill Gorcunov   mm: mminit_valida...
167
  	} else if (*end_pfn > max_sparsemem_pfn) {
2dbb51c49   Mel Gorman   mm: make defensiv...
168
169
170
171
172
173
174
175
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"End of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*end_pfn = max_sparsemem_pfn;
  	}
  }
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
176
177
178
179
180
181
182
183
184
  /*
   * There are a number of times that we loop over NR_MEM_SECTIONS,
   * looking for section_present() on each.  But, when we have very
   * large physical address spaces, NR_MEM_SECTIONS can also be
   * very large which makes the loops quite long.
   *
   * Keeping track of this gives us an easy way to break out of
   * those loops early.
   */
2491f0a2c   David Hildenbrand   mm: section numbe...
185
  unsigned long __highest_present_section_nr;
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
186
187
  static void section_mark_present(struct mem_section *ms)
  {
2491f0a2c   David Hildenbrand   mm: section numbe...
188
  	unsigned long section_nr = __section_nr(ms);
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
189
190
191
192
193
194
  
  	if (section_nr > __highest_present_section_nr)
  		__highest_present_section_nr = section_nr;
  
  	ms->section_mem_map |= SECTION_MARKED_PRESENT;
  }
2491f0a2c   David Hildenbrand   mm: section numbe...
195
  static inline unsigned long next_present_section_nr(unsigned long section_nr)
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
196
197
198
199
200
  {
  	do {
  		section_nr++;
  		if (present_section_nr(section_nr))
  			return section_nr;
d538c164f   Wei Yang   mm/sparse.c: chec...
201
  	} while ((section_nr <= __highest_present_section_nr));
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
202
203
204
205
206
  
  	return -1;
  }
  #define for_each_present_section_nr(start, section_nr)		\
  	for (section_nr = next_present_section_nr(start-1);	\
d778015ac   Qian Cai   mm/sparse: fix a ...
207
  	     ((section_nr != -1) &&				\
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
208
209
  	      (section_nr <= __highest_present_section_nr));	\
  	     section_nr = next_present_section_nr(section_nr))
85c77f791   Pavel Tatashin   mm/sparse: add ne...
210
211
212
213
  static inline unsigned long first_present_section_nr(void)
  {
  	return next_present_section_nr(-1);
  }
758b8db4a   Yi Wang   mm: fix -Wmissing...
214
  static void subsection_mask_set(unsigned long *map, unsigned long pfn,
f46edbd1b   Dan Williams   mm/sparsemem: add...
215
216
217
218
219
220
221
222
223
224
225
  		unsigned long nr_pages)
  {
  	int idx = subsection_map_index(pfn);
  	int end = subsection_map_index(pfn + nr_pages - 1);
  
  	bitmap_set(map, idx, end - idx + 1);
  }
  
  void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
  {
  	int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
9a8450304   Dan Williams   mm/sparsemem: cle...
226
  	unsigned long nr, start_sec = pfn_to_section_nr(pfn);
f46edbd1b   Dan Williams   mm/sparsemem: add...
227
228
229
  
  	if (!nr_pages)
  		return;
9a8450304   Dan Williams   mm/sparsemem: cle...
230
  	for (nr = start_sec; nr <= end_sec; nr++) {
f46edbd1b   Dan Williams   mm/sparsemem: add...
231
232
233
234
235
  		struct mem_section *ms;
  		unsigned long pfns;
  
  		pfns = min(nr_pages, PAGES_PER_SECTION
  				- (pfn & ~PAGE_SECTION_MASK));
9a8450304   Dan Williams   mm/sparsemem: cle...
236
  		ms = __nr_to_section(nr);
f46edbd1b   Dan Williams   mm/sparsemem: add...
237
  		subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
9a8450304   Dan Williams   mm/sparsemem: cle...
238
239
  		pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)
  ", __func__, nr,
f46edbd1b   Dan Williams   mm/sparsemem: add...
240
241
242
243
244
245
246
  				pfns, subsection_map_index(pfn),
  				subsection_map_index(pfn + pfns - 1));
  
  		pfn += pfns;
  		nr_pages -= pfns;
  	}
  }
2dbb51c49   Mel Gorman   mm: make defensiv...
247
248
249
250
  /* Record a memory area against a node. */
  void __init memory_present(int nid, unsigned long start, unsigned long end)
  {
  	unsigned long pfn;
bead9a3ab   Ingo Molnar   mm: sparsemem mem...
251

629a359bd   Kirill A. Shutemov   mm/sparsemem: Fix...
252
253
254
  #ifdef CONFIG_SPARSEMEM_EXTREME
  	if (unlikely(!mem_section)) {
  		unsigned long size, align;
d09cfbbfa   Baoquan He   mm/sparse.c: wron...
255
  		size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
629a359bd   Kirill A. Shutemov   mm/sparsemem: Fix...
256
  		align = 1 << (INTERNODE_CACHE_SHIFT);
eb31d559f   Mike Rapoport   memblock: remove ...
257
  		mem_section = memblock_alloc(size, align);
8a7f97b90   Mike Rapoport   treewide: add che...
258
259
260
261
  		if (!mem_section)
  			panic("%s: Failed to allocate %lu bytes align=0x%lx
  ",
  			      __func__, size, align);
629a359bd   Kirill A. Shutemov   mm/sparsemem: Fix...
262
263
  	}
  #endif
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
264
  	start &= PAGE_SECTION_MASK;
2dbb51c49   Mel Gorman   mm: make defensiv...
265
  	mminit_validate_memmodel_limits(&start, &end);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
266
267
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
  		unsigned long section = pfn_to_section_nr(pfn);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
268
269
270
  		struct mem_section *ms;
  
  		sparse_index_init(section, nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
271
  		set_section_nid(section, nid);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
272
273
  
  		ms = __nr_to_section(section);
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
274
  		if (!ms->section_mem_map) {
2d070eab2   Michal Hocko   mm: consider zone...
275
276
  			ms->section_mem_map = sparse_encode_early_nid(nid) |
  							SECTION_IS_ONLINE;
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
277
278
  			section_mark_present(ms);
  		}
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
279
280
281
282
  	}
  }
  
  /*
9def36e0f   Logan Gunthorpe   mm/sparse: add co...
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
   * Mark all memblocks as present using memory_present(). This is a
   * convienence function that is useful for a number of arches
   * to mark all of the systems memory as present during initialization.
   */
  void __init memblocks_present(void)
  {
  	struct memblock_region *reg;
  
  	for_each_memblock(memory, reg) {
  		memory_present(memblock_get_region_node(reg),
  			       memblock_region_memory_base_pfn(reg),
  			       memblock_region_memory_end_pfn(reg));
  	}
  }
  
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
299
300
301
302
303
304
   * Subtle, we encode the real pfn into the mem_map such that
   * the identity pfn - section_mem_map will return the actual
   * physical page frame number.
   */
  static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
  {
def9b71ee   Petr Tesarik   include/linux/mmz...
305
306
307
308
309
  	unsigned long coded_mem_map =
  		(unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
  	BUILD_BUG_ON(SECTION_MAP_LAST_BIT > (1UL<<PFN_SECTION_SHIFT));
  	BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
  	return coded_mem_map;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
310
311
312
  }
  
  /*
ea01ea937   Badari Pulavarty   hotplug memory re...
313
   * Decode mem_map from the coded memmap
29751f699   Andy Whitcroft   [PATCH] sparsemem...
314
   */
29751f699   Andy Whitcroft   [PATCH] sparsemem...
315
316
  struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
  {
ea01ea937   Badari Pulavarty   hotplug memory re...
317
318
  	/* mask off the extra low bits of information */
  	coded_mem_map &= SECTION_MAP_MASK;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
319
320
  	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
  }
4e40987f1   Oscar Salvador   mm/sparse.c: make...
321
  static void __meminit sparse_init_one_section(struct mem_section *ms,
5c0e30664   Mel Gorman   Fix corruption of...
322
  		unsigned long pnum, struct page *mem_map,
326e1b8f8   Dan Williams   mm/sparsemem: int...
323
  		struct mem_section_usage *usage, unsigned long flags)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
324
  {
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
325
  	ms->section_mem_map &= ~SECTION_MAP_MASK;
326e1b8f8   Dan Williams   mm/sparsemem: int...
326
327
  	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum)
  		| SECTION_HAS_MEM_MAP | flags;
f1eca35a0   Dan Williams   mm/sparsemem: int...
328
  	ms->usage = usage;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
329
  }
f1eca35a0   Dan Williams   mm/sparsemem: int...
330
  static unsigned long usemap_size(void)
5c0e30664   Mel Gorman   Fix corruption of...
331
  {
60a7a88db   Wei Yang   mm/sparse: refine...
332
  	return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
5c0e30664   Mel Gorman   Fix corruption of...
333
  }
f1eca35a0   Dan Williams   mm/sparsemem: int...
334
  size_t mem_section_usage_size(void)
5c0e30664   Mel Gorman   Fix corruption of...
335
  {
f1eca35a0   Dan Williams   mm/sparsemem: int...
336
  	return sizeof(struct mem_section_usage) + usemap_size();
5c0e30664   Mel Gorman   Fix corruption of...
337
  }
5c0e30664   Mel Gorman   Fix corruption of...
338

48c906823   Yasunori Goto   memory hotplug: a...
339
  #ifdef CONFIG_MEMORY_HOTREMOVE
f1eca35a0   Dan Williams   mm/sparsemem: int...
340
  static struct mem_section_usage * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
341
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
238305bb4   Johannes Weiner   mm: remove sparse...
342
  					 unsigned long size)
48c906823   Yasunori Goto   memory hotplug: a...
343
  {
f1eca35a0   Dan Williams   mm/sparsemem: int...
344
  	struct mem_section_usage *usage;
99ab7b194   Yinghai Lu   mm: sparse: fix u...
345
  	unsigned long goal, limit;
99ab7b194   Yinghai Lu   mm: sparse: fix u...
346
  	int nid;
48c906823   Yasunori Goto   memory hotplug: a...
347
348
349
  	/*
  	 * A page may contain usemaps for other sections preventing the
  	 * page being freed and making a section unremovable while
c800bcd5f   Li Zhong   sparse: fix comment
350
  	 * other sections referencing the usemap remain active. Similarly,
48c906823   Yasunori Goto   memory hotplug: a...
351
352
353
354
355
356
  	 * a pgdat can prevent a section being removed. If section A
  	 * contains a pgdat and section B contains the usemap, both
  	 * sections become inter-dependent. This allocates usemaps
  	 * from the same section as the pgdat where possible to avoid
  	 * this problem.
  	 */
07b4e2bc9   Yinghai Lu   mm: sparse: fix s...
357
  	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
99ab7b194   Yinghai Lu   mm: sparse: fix u...
358
359
360
  	limit = goal + (1UL << PA_SECTION_SHIFT);
  	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
  again:
f1eca35a0   Dan Williams   mm/sparsemem: int...
361
362
  	usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
  	if (!usage && limit) {
99ab7b194   Yinghai Lu   mm: sparse: fix u...
363
364
365
  		limit = 0;
  		goto again;
  	}
f1eca35a0   Dan Williams   mm/sparsemem: int...
366
  	return usage;
48c906823   Yasunori Goto   memory hotplug: a...
367
  }
f1eca35a0   Dan Williams   mm/sparsemem: int...
368
369
  static void __init check_usemap_section_nr(int nid,
  		struct mem_section_usage *usage)
48c906823   Yasunori Goto   memory hotplug: a...
370
371
  {
  	unsigned long usemap_snr, pgdat_snr;
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
372
373
  	static unsigned long old_usemap_snr;
  	static unsigned long old_pgdat_snr;
48c906823   Yasunori Goto   memory hotplug: a...
374
375
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int usemap_nid;
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
376
377
378
379
380
  	/* First call */
  	if (!old_usemap_snr) {
  		old_usemap_snr = NR_MEM_SECTIONS;
  		old_pgdat_snr = NR_MEM_SECTIONS;
  	}
f1eca35a0   Dan Williams   mm/sparsemem: int...
381
  	usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
48c906823   Yasunori Goto   memory hotplug: a...
382
383
384
385
386
387
388
389
390
391
392
393
394
  	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
  	if (usemap_snr == pgdat_snr)
  		return;
  
  	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
  		/* skip redundant message */
  		return;
  
  	old_usemap_snr = usemap_snr;
  	old_pgdat_snr = pgdat_snr;
  
  	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
  	if (usemap_nid != nid) {
1170532bb   Joe Perches   mm: convert print...
395
396
397
  		pr_info("node %d must be removed before remove section %ld
  ",
  			nid, usemap_snr);
48c906823   Yasunori Goto   memory hotplug: a...
398
399
400
401
402
403
404
405
  		return;
  	}
  	/*
  	 * There is a circular dependency.
  	 * Some platforms allow un-removable section because they will just
  	 * gather other removable sections for dynamic partitioning.
  	 * Just notify un-removable section's number here.
  	 */
1170532bb   Joe Perches   mm: convert print...
406
407
408
  	pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations
  ",
  		usemap_snr, pgdat_snr, nid);
48c906823   Yasunori Goto   memory hotplug: a...
409
410
  }
  #else
f1eca35a0   Dan Williams   mm/sparsemem: int...
411
  static struct mem_section_usage * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
412
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
238305bb4   Johannes Weiner   mm: remove sparse...
413
  					 unsigned long size)
48c906823   Yasunori Goto   memory hotplug: a...
414
  {
26fb3dae0   Mike Rapoport   memblock: drop me...
415
  	return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id);
48c906823   Yasunori Goto   memory hotplug: a...
416
  }
f1eca35a0   Dan Williams   mm/sparsemem: int...
417
418
  static void __init check_usemap_section_nr(int nid,
  		struct mem_section_usage *usage)
48c906823   Yasunori Goto   memory hotplug: a...
419
420
421
  {
  }
  #endif /* CONFIG_MEMORY_HOTREMOVE */
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
422
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
afda57bc1   Pavel Tatashin   mm/sparse: move b...
423
  static unsigned long __init section_map_size(void)
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
424
425
426
427
428
  {
  	return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
  }
  
  #else
afda57bc1   Pavel Tatashin   mm/sparse: move b...
429
  static unsigned long __init section_map_size(void)
e131c06b1   Pavel Tatashin   mm/sparse: use th...
430
431
432
  {
  	return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
  }
e9c0a3f05   Dan Williams   mm/sparsemem: con...
433
434
  struct page __init *__populate_section_memmap(unsigned long pfn,
  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
435
  {
e131c06b1   Pavel Tatashin   mm/sparse: use th...
436
437
  	unsigned long size = section_map_size();
  	struct page *map = sparse_buffer_alloc(size);
8a7f97b90   Mike Rapoport   treewide: add che...
438
  	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
e131c06b1   Pavel Tatashin   mm/sparse: use th...
439
440
441
  
  	if (map)
  		return map;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
442

eb31d559f   Mike Rapoport   memblock: remove ...
443
  	map = memblock_alloc_try_nid(size,
8a7f97b90   Mike Rapoport   treewide: add che...
444
  					  PAGE_SIZE, addr,
97ad1087e   Mike Rapoport   memblock: replace...
445
  					  MEMBLOCK_ALLOC_ACCESSIBLE, nid);
8a7f97b90   Mike Rapoport   treewide: add che...
446
447
448
449
  	if (!map)
  		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa
  ",
  		      __func__, size, PAGE_SIZE, nid, &addr);
8f6aac419   Christoph Lameter   Generic Virtual M...
450
451
452
  	return map;
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
453
454
  static void *sparsemap_buf __meminitdata;
  static void *sparsemap_buf_end __meminitdata;
ae8318940   Lecopzer Chen   mm/sparse.c: fix ...
455
456
457
458
459
  static inline void __meminit sparse_buffer_free(unsigned long size)
  {
  	WARN_ON(!sparsemap_buf || size == 0);
  	memblock_free_early(__pa(sparsemap_buf), size);
  }
afda57bc1   Pavel Tatashin   mm/sparse: move b...
460
  static void __init sparse_buffer_init(unsigned long size, int nid)
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
461
  {
8a7f97b90   Mike Rapoport   treewide: add che...
462
  	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
463
464
  	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
  	sparsemap_buf =
eb31d559f   Mike Rapoport   memblock: remove ...
465
  		memblock_alloc_try_nid_raw(size, PAGE_SIZE,
8a7f97b90   Mike Rapoport   treewide: add che...
466
  						addr,
97ad1087e   Mike Rapoport   memblock: replace...
467
  						MEMBLOCK_ALLOC_ACCESSIBLE, nid);
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
468
469
  	sparsemap_buf_end = sparsemap_buf + size;
  }
afda57bc1   Pavel Tatashin   mm/sparse: move b...
470
  static void __init sparse_buffer_fini(void)
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
471
472
473
474
  {
  	unsigned long size = sparsemap_buf_end - sparsemap_buf;
  
  	if (sparsemap_buf && size > 0)
ae8318940   Lecopzer Chen   mm/sparse.c: fix ...
475
  		sparse_buffer_free(size);
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
476
477
478
479
480
481
482
483
  	sparsemap_buf = NULL;
  }
  
  void * __meminit sparse_buffer_alloc(unsigned long size)
  {
  	void *ptr = NULL;
  
  	if (sparsemap_buf) {
db57e98d8   Lecopzer Chen   mm/sparse.c: fix ...
484
  		ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
485
486
  		if (ptr + size > sparsemap_buf_end)
  			ptr = NULL;
ae8318940   Lecopzer Chen   mm/sparse.c: fix ...
487
488
489
490
  		else {
  			/* Free redundant aligned space */
  			if ((unsigned long)(ptr - sparsemap_buf) > 0)
  				sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
491
  			sparsemap_buf = ptr + size;
ae8318940   Lecopzer Chen   mm/sparse.c: fix ...
492
  		}
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
493
494
495
  	}
  	return ptr;
  }
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
496
  void __weak __meminit vmemmap_populate_print_last(void)
c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
497
498
  {
  }
a4322e1ba   Yinghai Lu   sparsemem: Put us...
499

85c77f791   Pavel Tatashin   mm/sparse: add ne...
500
501
502
503
504
505
506
507
  /*
   * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
   * And number of present sections in this node is map_count.
   */
  static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
  				   unsigned long pnum_end,
  				   unsigned long map_count)
  {
f1eca35a0   Dan Williams   mm/sparsemem: int...
508
509
  	struct mem_section_usage *usage;
  	unsigned long pnum;
85c77f791   Pavel Tatashin   mm/sparse: add ne...
510
  	struct page *map;
f1eca35a0   Dan Williams   mm/sparsemem: int...
511
512
513
  	usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
  			mem_section_usage_size() * map_count);
  	if (!usage) {
85c77f791   Pavel Tatashin   mm/sparse: add ne...
514
515
516
517
518
  		pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
  		goto failed;
  	}
  	sparse_buffer_init(map_count * section_map_size(), nid);
  	for_each_present_section_nr(pnum_begin, pnum) {
e9c0a3f05   Dan Williams   mm/sparsemem: con...
519
  		unsigned long pfn = section_nr_to_pfn(pnum);
85c77f791   Pavel Tatashin   mm/sparse: add ne...
520
521
  		if (pnum >= pnum_end)
  			break;
e9c0a3f05   Dan Williams   mm/sparsemem: con...
522
523
  		map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
  				nid, NULL);
85c77f791   Pavel Tatashin   mm/sparse: add ne...
524
525
526
527
528
529
  		if (!map) {
  			pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
  			       __func__, nid);
  			pnum_begin = pnum;
  			goto failed;
  		}
f1eca35a0   Dan Williams   mm/sparsemem: int...
530
  		check_usemap_section_nr(nid, usage);
326e1b8f8   Dan Williams   mm/sparsemem: int...
531
532
  		sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage,
  				SECTION_IS_EARLY);
f1eca35a0   Dan Williams   mm/sparsemem: int...
533
  		usage = (void *) usage + mem_section_usage_size();
85c77f791   Pavel Tatashin   mm/sparse: add ne...
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
  	}
  	sparse_buffer_fini();
  	return;
  failed:
  	/* We failed to allocate, mark all the following pnums as not present */
  	for_each_present_section_nr(pnum_begin, pnum) {
  		struct mem_section *ms;
  
  		if (pnum >= pnum_end)
  			break;
  		ms = __nr_to_section(pnum);
  		ms->section_mem_map = 0;
  	}
  }
  
  /*
   * Allocate the accumulated non-linear sections, allocate a mem_map
   * for each and record the physical to section mapping.
   */
2a3cb8bae   Pavel Tatashin   mm/sparse: delete...
553
  void __init sparse_init(void)
85c77f791   Pavel Tatashin   mm/sparse: add ne...
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
  {
  	unsigned long pnum_begin = first_present_section_nr();
  	int nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
  	unsigned long pnum_end, map_count = 1;
  
  	/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
  	set_pageblock_order();
  
  	for_each_present_section_nr(pnum_begin + 1, pnum_end) {
  		int nid = sparse_early_nid(__nr_to_section(pnum_end));
  
  		if (nid == nid_begin) {
  			map_count++;
  			continue;
  		}
  		/* Init node with sections in range [pnum_begin, pnum_end) */
  		sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
  		nid_begin = nid;
  		pnum_begin = pnum_end;
  		map_count = 1;
  	}
  	/* cover the last node */
  	sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
  	vmemmap_populate_print_last();
  }
193faea92   Stephen Rothwell   Move three functi...
579
  #ifdef CONFIG_MEMORY_HOTPLUG
2d070eab2   Michal Hocko   mm: consider zone...
580
581
582
583
584
585
586
  
  /* Mark all memory sections within the pfn range as online */
  void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
b4ccec41a   Michal Hocko   mm/sparse.c: fix ...
587
  		unsigned long section_nr = pfn_to_section_nr(pfn);
2d070eab2   Michal Hocko   mm: consider zone...
588
589
590
591
592
593
594
595
596
597
598
599
  		struct mem_section *ms;
  
  		/* onlining code should never touch invalid ranges */
  		if (WARN_ON(!valid_section_nr(section_nr)))
  			continue;
  
  		ms = __nr_to_section(section_nr);
  		ms->section_mem_map |= SECTION_IS_ONLINE;
  	}
  }
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
9b7ea46a8   Qian Cai   mm/hotplug: fix o...
600
  /* Mark all memory sections within the pfn range as offline */
2d070eab2   Michal Hocko   mm: consider zone...
601
602
603
604
605
  void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
27227c733   Pavel Tatashin   mm: sections are ...
606
  		unsigned long section_nr = pfn_to_section_nr(pfn);
2d070eab2   Michal Hocko   mm: consider zone...
607
608
609
610
611
612
613
614
615
616
617
618
619
620
  		struct mem_section *ms;
  
  		/*
  		 * TODO this needs some double checking. Offlining code makes
  		 * sure to check pfn_valid but those checks might be just bogus
  		 */
  		if (WARN_ON(!valid_section_nr(section_nr)))
  			continue;
  
  		ms = __nr_to_section(section_nr);
  		ms->section_mem_map &= ~SECTION_IS_ONLINE;
  	}
  }
  #endif
98f3cfc1d   Yasunori Goto   memory hotplug: H...
621
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
5e71be1a6   Ilya Leoshkevich   mm/sparse.c: mark...
622
  static struct page * __meminit populate_section_memmap(unsigned long pfn,
e9c0a3f05   Dan Williams   mm/sparsemem: con...
623
  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
98f3cfc1d   Yasunori Goto   memory hotplug: H...
624
  {
e9c0a3f05   Dan Williams   mm/sparsemem: con...
625
  	return __populate_section_memmap(pfn, nr_pages, nid, altmap);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
626
  }
e9c0a3f05   Dan Williams   mm/sparsemem: con...
627
628
  
  static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
24b6d4164   Christoph Hellwig   mm: pass the vmem...
629
  		struct vmem_altmap *altmap)
98f3cfc1d   Yasunori Goto   memory hotplug: H...
630
  {
e9c0a3f05   Dan Williams   mm/sparsemem: con...
631
632
  	unsigned long start = (unsigned long) pfn_to_page(pfn);
  	unsigned long end = start + nr_pages * sizeof(struct page);
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
633

24b6d4164   Christoph Hellwig   mm: pass the vmem...
634
  	vmemmap_free(start, end, altmap);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
635
  }
81556b025   Zhang Yanfei   mm/sparsemem: fix...
636
  static void free_map_bootmem(struct page *memmap)
0c0a4a517   Yasunori Goto   memory hotplug: f...
637
  {
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
638
  	unsigned long start = (unsigned long)memmap;
81556b025   Zhang Yanfei   mm/sparsemem: fix...
639
  	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
640

24b6d4164   Christoph Hellwig   mm: pass the vmem...
641
  	vmemmap_free(start, end, NULL);
0c0a4a517   Yasunori Goto   memory hotplug: f...
642
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
643
  #else
5e71be1a6   Ilya Leoshkevich   mm/sparse.c: mark...
644
  struct page * __meminit populate_section_memmap(unsigned long pfn,
e9c0a3f05   Dan Williams   mm/sparsemem: con...
645
  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
0b0acbec1   Dave Hansen   [PATCH] memory ho...
646
647
  {
  	struct page *page, *ret;
85b35feae   Zhang Yanfei   mm/sparsemem: use...
648
  	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
649

f2d0aa5bf   Yasunori Goto   [PATCH] memory ho...
650
  	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
0b0acbec1   Dave Hansen   [PATCH] memory ho...
651
652
653
654
655
656
657
658
659
660
661
  	if (page)
  		goto got_map_page;
  
  	ret = vmalloc(memmap_size);
  	if (ret)
  		goto got_map_ptr;
  
  	return NULL;
  got_map_page:
  	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
  got_map_ptr:
0b0acbec1   Dave Hansen   [PATCH] memory ho...
662
663
664
  
  	return ret;
  }
e9c0a3f05   Dan Williams   mm/sparsemem: con...
665
  static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
7b73d978a   Christoph Hellwig   mm: pass the vmem...
666
  		struct vmem_altmap *altmap)
98f3cfc1d   Yasunori Goto   memory hotplug: H...
667
  {
e9c0a3f05   Dan Williams   mm/sparsemem: con...
668
  	struct page *memmap = pfn_to_page(pfn);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
669

9e2779fa2   Christoph Lameter   is_vmalloc_addr()...
670
  	if (is_vmalloc_addr(memmap))
0b0acbec1   Dave Hansen   [PATCH] memory ho...
671
672
673
  		vfree(memmap);
  	else
  		free_pages((unsigned long)memmap,
85b35feae   Zhang Yanfei   mm/sparsemem: use...
674
  			   get_order(sizeof(struct page) * PAGES_PER_SECTION));
0b0acbec1   Dave Hansen   [PATCH] memory ho...
675
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
676

81556b025   Zhang Yanfei   mm/sparsemem: fix...
677
  static void free_map_bootmem(struct page *memmap)
0c0a4a517   Yasunori Goto   memory hotplug: f...
678
679
  {
  	unsigned long maps_section_nr, removing_section_nr, i;
81556b025   Zhang Yanfei   mm/sparsemem: fix...
680
  	unsigned long magic, nr_pages;
ae64ffcac   Jianguo Wu   mm/vmemmap: fix w...
681
  	struct page *page = virt_to_page(memmap);
0c0a4a517   Yasunori Goto   memory hotplug: f...
682

81556b025   Zhang Yanfei   mm/sparsemem: fix...
683
684
  	nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
  		>> PAGE_SHIFT;
0c0a4a517   Yasunori Goto   memory hotplug: f...
685
  	for (i = 0; i < nr_pages; i++, page++) {
ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
686
  		magic = (unsigned long) page->freelist;
0c0a4a517   Yasunori Goto   memory hotplug: f...
687
688
689
690
  
  		BUG_ON(magic == NODE_INFO);
  
  		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
857e522a0   Yasuaki Ishimatsu   mm/sparse: use pa...
691
  		removing_section_nr = page_private(page);
0c0a4a517   Yasunori Goto   memory hotplug: f...
692
693
694
695
696
697
698
699
700
701
702
703
704
  
  		/*
  		 * When this function is called, the removing section is
  		 * logical offlined state. This means all pages are isolated
  		 * from page allocator. If removing section's memmap is placed
  		 * on the same section, it must not be freed.
  		 * If it is freed, page allocator may allocate it which will
  		 * be removed physically soon.
  		 */
  		if (maps_section_nr != removing_section_nr)
  			put_page_bootmem(page);
  	}
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
705
  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
0b0acbec1   Dave Hansen   [PATCH] memory ho...
706

ba72b4c8c   Dan Williams   mm/sparsemem: sup...
707
708
709
710
711
712
713
714
  static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
  		struct vmem_altmap *altmap)
  {
  	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
  	DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
  	struct mem_section *ms = __pfn_to_section(pfn);
  	bool section_is_early = early_section(ms);
  	struct page *memmap = NULL;
c3f54f0a6   Baoquan He   mm/hotplug: fix h...
715
  	bool empty;
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
  	unsigned long *subsection_map = ms->usage
  		? &ms->usage->subsection_map[0] : NULL;
  
  	subsection_mask_set(map, pfn, nr_pages);
  	if (subsection_map)
  		bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
  
  	if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
  				"section already deactivated (%#lx + %ld)
  ",
  				pfn, nr_pages))
  		return;
  
  	/*
  	 * There are 3 cases to handle across two configurations
  	 * (SPARSEMEM_VMEMMAP={y,n}):
  	 *
  	 * 1/ deactivation of a partial hot-added section (only possible
  	 * in the SPARSEMEM_VMEMMAP=y case).
  	 *    a/ section was present at memory init
  	 *    b/ section was hot-added post memory init
  	 * 2/ deactivation of a complete hot-added section
  	 * 3/ deactivation of a complete section from memory init
  	 *
  	 * For 1/, when subsection_map does not empty we will not be
  	 * freeing the usage map, but still need to free the vmemmap
  	 * range.
  	 *
  	 * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
  	 */
  	bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
c3f54f0a6   Baoquan He   mm/hotplug: fix h...
747
748
  	empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
  	if (empty) {
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
749
  		unsigned long section_nr = pfn_to_section_nr(pfn);
5147a518f   David Hildenbrand   mm/memory_hotplug...
750
751
752
753
754
755
756
757
  		/*
  		 * When removing an early section, the usage map is kept (as the
  		 * usage maps of other sections fall into the same page). It
  		 * will be re-used when re-adding the section - which is then no
  		 * longer an early section. If the usage map is PageReserved, it
  		 * was allocated during boot.
  		 */
  		if (!PageReserved(virt_to_page(ms->usage))) {
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
758
759
760
761
  			kfree(ms->usage);
  			ms->usage = NULL;
  		}
  		memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
cc5da743a   Aneesh Kumar K.V   mm/sparse: fix ke...
762
763
764
765
766
767
  		/*
  		 * Mark the section invalid so that valid_section()
  		 * return false. This prevents code from dereferencing
  		 * ms->usage array.
  		 */
  		ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
768
769
770
771
772
773
  	}
  
  	if (section_is_early && memmap)
  		free_map_bootmem(memmap);
  	else
  		depopulate_section_memmap(pfn, nr_pages, altmap);
c3f54f0a6   Baoquan He   mm/hotplug: fix h...
774
775
776
  
  	if (empty)
  		ms->section_mem_map = (unsigned long)NULL;
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
  }
  
  static struct page * __meminit section_activate(int nid, unsigned long pfn,
  		unsigned long nr_pages, struct vmem_altmap *altmap)
  {
  	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
  	struct mem_section *ms = __pfn_to_section(pfn);
  	struct mem_section_usage *usage = NULL;
  	unsigned long *subsection_map;
  	struct page *memmap;
  	int rc = 0;
  
  	subsection_mask_set(map, pfn, nr_pages);
  
  	if (!ms->usage) {
  		usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
  		if (!usage)
  			return ERR_PTR(-ENOMEM);
  		ms->usage = usage;
  	}
  	subsection_map = &ms->usage->subsection_map[0];
  
  	if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
  		rc = -EINVAL;
  	else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
  		rc = -EEXIST;
  	else
  		bitmap_or(subsection_map, map, subsection_map,
  				SUBSECTIONS_PER_SECTION);
  
  	if (rc) {
  		if (usage)
  			ms->usage = NULL;
  		kfree(usage);
  		return ERR_PTR(rc);
  	}
  
  	/*
  	 * The early init code does not consider partially populated
  	 * initial sections, it simply assumes that memory will never be
  	 * referenced.  If we hot-add memory into such a section then we
  	 * do not need to populate the memmap and can simply reuse what
  	 * is already there.
  	 */
  	if (nr_pages < PAGES_PER_SECTION && early_section(ms))
  		return pfn_to_page(pfn);
  
  	memmap = populate_section_memmap(pfn, nr_pages, nid, altmap);
  	if (!memmap) {
  		section_deactivate(pfn, nr_pages, altmap);
  		return ERR_PTR(-ENOMEM);
  	}
  
  	return memmap;
  }
7567cfc5d   Baoquan He   mm/sparse.c: clea...
832
  /**
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
833
   * sparse_add_section - add a memory section, or populate an existing one
7567cfc5d   Baoquan He   mm/sparse.c: clea...
834
835
   * @nid: The node to add section on
   * @start_pfn: start pfn of the memory range
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
836
   * @nr_pages: number of pfns to add in the section
7567cfc5d   Baoquan He   mm/sparse.c: clea...
837
838
839
840
841
842
843
844
   * @altmap: device page map
   *
   * This is only intended for hotplug.
   *
   * Return:
   * * 0		- On success.
   * * -EEXIST	- Section has been present.
   * * -ENOMEM	- Out of memory.
29751f699   Andy Whitcroft   [PATCH] sparsemem...
845
   */
7ea621604   Dan Williams   mm/sparsemem: pre...
846
847
  int __meminit sparse_add_section(int nid, unsigned long start_pfn,
  		unsigned long nr_pages, struct vmem_altmap *altmap)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
848
  {
0b0acbec1   Dave Hansen   [PATCH] memory ho...
849
  	unsigned long section_nr = pfn_to_section_nr(start_pfn);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
850
851
  	struct mem_section *ms;
  	struct page *memmap;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
852
  	int ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
853

4e0d2e7ef   Wei Yang   mm, sparse: pass ...
854
  	ret = sparse_index_init(section_nr, nid);
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
855
  	if (ret < 0)
bbd068259   WANG Cong   mm/sparse.c: impr...
856
  		return ret;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
857

ba72b4c8c   Dan Williams   mm/sparsemem: sup...
858
859
860
  	memmap = section_activate(nid, start_pfn, nr_pages, altmap);
  	if (IS_ERR(memmap))
  		return PTR_ERR(memmap);
5c0e30664   Mel Gorman   Fix corruption of...
861

d0dc12e86   Pavel Tatashin   mm/memory_hotplug...
862
863
864
865
  	/*
  	 * Poison uninitialized struct pages in order to catch invalid flags
  	 * combinations.
  	 */
ef32399bf   Wei Yang   mm/sparsemem: pfn...
866
  	page_init_poison(memmap, sizeof(struct page) * nr_pages);
3ac19f8ef   Wen Congyang   memory-hotplug, m...
867

c1cbc3eeb   Wei Yang   mm/sparse.c: use ...
868
  	ms = __nr_to_section(section_nr);
26f26beda   Wei Yang   mm/sparse.c: set ...
869
  	set_section_nid(section_nr, nid);
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
870
  	section_mark_present(ms);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
871

ba72b4c8c   Dan Williams   mm/sparsemem: sup...
872
873
874
875
876
877
  	/* Align memmap to section boundary in the subsection case */
  	if (section_nr_to_pfn(section_nr) != start_pfn)
  		memmap = pfn_to_kaddr(section_nr_to_pfn(section_nr));
  	sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
  
  	return 0;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
878
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
879

95a4774d0   Wen Congyang   memory-hotplug: u...
880
881
882
883
  #ifdef CONFIG_MEMORY_FAILURE
  static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
  {
  	int i;
5eb570a8d   Balbir Singh   mm/hotplug: optim...
884
885
886
887
888
889
890
891
  	/*
  	 * A further optimization is to have per section refcounted
  	 * num_poisoned_pages.  But that would need more space per memmap, so
  	 * for now just do a quick global check to speed up this routine in the
  	 * absence of bad pages.
  	 */
  	if (atomic_long_read(&num_poisoned_pages) == 0)
  		return;
4b94ffdc4   Dan Williams   x86, mm: introduc...
892
  	for (i = 0; i < nr_pages; i++) {
95a4774d0   Wen Congyang   memory-hotplug: u...
893
  		if (PageHWPoison(&memmap[i])) {
9f82883c6   Alastair D'Silva   mm/sparse.c: don'...
894
  			num_poisoned_pages_dec();
95a4774d0   Wen Congyang   memory-hotplug: u...
895
896
897
898
899
900
901
902
903
  			ClearPageHWPoison(&memmap[i]);
  		}
  	}
  }
  #else
  static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
  {
  }
  #endif
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
904
  void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
7ea621604   Dan Williams   mm/sparsemem: pre...
905
906
  		unsigned long nr_pages, unsigned long map_offset,
  		struct vmem_altmap *altmap)
ea01ea937   Badari Pulavarty   hotplug memory re...
907
  {
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
908
909
910
  	clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
  			nr_pages - map_offset);
  	section_deactivate(pfn, nr_pages, altmap);
ea01ea937   Badari Pulavarty   hotplug memory re...
911
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
912
  #endif /* CONFIG_MEMORY_HOTPLUG */