Blame view

mm/sparse.c 26.4 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
2
3
4
  /*
   * sparse memory mappings.
   */
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
5
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
6
  #include <linux/slab.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
7
  #include <linux/mmzone.h>
97ad1087e   Mike Rapoport   memblock: replace...
8
  #include <linux/memblock.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
9
  #include <linux/compiler.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
10
  #include <linux/highmem.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
11
  #include <linux/export.h>
28ae55c98   Dave Hansen   [PATCH] sparsemem...
12
  #include <linux/spinlock.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
13
  #include <linux/vmalloc.h>
9f82883c6   Alastair D'Silva   mm/sparse.c: don'...
14
15
  #include <linux/swap.h>
  #include <linux/swapops.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
16

0c0a4a517   Yasunori Goto   memory hotplug: f...
17
  #include "internal.h"
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
18
19
20
21
22
23
24
  #include <asm/dma.h>
  
  /*
   * Permanent SPARSEMEM data:
   *
   * 1) mem_section	- memory sections, mem_map's for valid memory
   */
3e347261a   Bob Picco   [PATCH] sparsemem...
25
  #ifdef CONFIG_SPARSEMEM_EXTREME
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
26
  struct mem_section **mem_section;
3e347261a   Bob Picco   [PATCH] sparsemem...
27
28
  #else
  struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
29
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
30
31
  #endif
  EXPORT_SYMBOL(mem_section);
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
32
33
34
35
36
37
38
39
40
41
42
  #ifdef NODE_NOT_IN_PAGE_FLAGS
  /*
   * If we did not store the node number in the page then we have to
   * do a lookup in the section_to_node_table in order to find which
   * node the page belongs to.
   */
  #if MAX_NUMNODES <= 256
  static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #else
  static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #endif
33dd4e0ec   Ian Campbell   mm: make some str...
43
  int page_to_nid(const struct page *page)
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
44
45
46
47
  {
  	return section_to_node_table[page_to_section(page)];
  }
  EXPORT_SYMBOL(page_to_nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
48
49
50
51
52
53
54
55
56
  
  static void set_section_nid(unsigned long section_nr, int nid)
  {
  	section_to_node_table[section_nr] = nid;
  }
  #else /* !NODE_NOT_IN_PAGE_FLAGS */
  static inline void set_section_nid(unsigned long section_nr, int nid)
  {
  }
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
57
  #endif
3e347261a   Bob Picco   [PATCH] sparsemem...
58
  #ifdef CONFIG_SPARSEMEM_EXTREME
bd721ea73   Fabian Frederick   treewide: replace...
59
  static noinline struct mem_section __ref *sparse_index_alloc(int nid)
28ae55c98   Dave Hansen   [PATCH] sparsemem...
60
61
62
63
  {
  	struct mem_section *section = NULL;
  	unsigned long array_size = SECTIONS_PER_ROOT *
  				   sizeof(struct mem_section);
8a7f97b90   Mike Rapoport   treewide: add che...
64
  	if (slab_is_available()) {
b95046b04   Michal Hocko   mm, sparse, page_...
65
  		section = kzalloc_node(array_size, GFP_KERNEL, nid);
8a7f97b90   Mike Rapoport   treewide: add che...
66
  	} else {
7e1c4e279   Mike Rapoport   memblock: stop us...
67
68
  		section = memblock_alloc_node(array_size, SMP_CACHE_BYTES,
  					      nid);
8a7f97b90   Mike Rapoport   treewide: add che...
69
70
71
72
73
  		if (!section)
  			panic("%s: Failed to allocate %lu bytes nid=%d
  ",
  			      __func__, array_size, nid);
  	}
28ae55c98   Dave Hansen   [PATCH] sparsemem...
74
75
  
  	return section;
3e347261a   Bob Picco   [PATCH] sparsemem...
76
  }
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
77

a3142c8e1   Yasunori Goto   Fix section misma...
78
  static int __meminit sparse_index_init(unsigned long section_nr, int nid)
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
79
  {
28ae55c98   Dave Hansen   [PATCH] sparsemem...
80
81
  	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  	struct mem_section *section;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
82

ba72b4c8c   Dan Williams   mm/sparsemem: sup...
83
84
85
86
87
88
89
  	/*
  	 * An existing section is possible in the sub-section hotplug
  	 * case. First hot-add instantiates, follow-on hot-add reuses
  	 * the existing section.
  	 *
  	 * The mem_hotplug_lock resolves the apparent race below.
  	 */
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
90
  	if (mem_section[root])
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
91
  		return 0;
3e347261a   Bob Picco   [PATCH] sparsemem...
92

28ae55c98   Dave Hansen   [PATCH] sparsemem...
93
  	section = sparse_index_alloc(nid);
af0cd5a7c   WANG Cong   mm/sparse.c: chec...
94
95
  	if (!section)
  		return -ENOMEM;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
96
97
  
  	mem_section[root] = section;
c1c951833   Gavin Shan   mm/sparse: remove...
98

9d1936cf8   Zhang Yanfei   mm/sparse: Remove...
99
  	return 0;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
100
101
102
103
104
  }
  #else /* !SPARSEMEM_EXTREME */
  static inline int sparse_index_init(unsigned long section_nr, int nid)
  {
  	return 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
105
  }
28ae55c98   Dave Hansen   [PATCH] sparsemem...
106
  #endif
91fd8b95d   Zhou Chengming   make __section_nr...
107
  #ifdef CONFIG_SPARSEMEM_EXTREME
2491f0a2c   David Hildenbrand   mm: section numbe...
108
  unsigned long __section_nr(struct mem_section *ms)
4ca644d97   Dave Hansen   [PATCH] memory ho...
109
110
  {
  	unsigned long root_nr;
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
111
  	struct mem_section *root = NULL;
4ca644d97   Dave Hansen   [PATCH] memory ho...
112

12783b002   Mike Kravetz   [PATCH] SPARSEMEM...
113
114
  	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
  		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
4ca644d97   Dave Hansen   [PATCH] memory ho...
115
116
117
118
119
120
  		if (!root)
  			continue;
  
  		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
  		     break;
  	}
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
121
  	VM_BUG_ON(!root);
db36a4611   Gavin Shan   mm/sparse: more c...
122

4ca644d97   Dave Hansen   [PATCH] memory ho...
123
124
  	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
  }
91fd8b95d   Zhou Chengming   make __section_nr...
125
  #else
2491f0a2c   David Hildenbrand   mm: section numbe...
126
  unsigned long __section_nr(struct mem_section *ms)
91fd8b95d   Zhou Chengming   make __section_nr...
127
  {
2491f0a2c   David Hildenbrand   mm: section numbe...
128
  	return (unsigned long)(ms - mem_section[0]);
91fd8b95d   Zhou Chengming   make __section_nr...
129
130
  }
  #endif
4ca644d97   Dave Hansen   [PATCH] memory ho...
131

30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
  /*
   * During early boot, before section_mem_map is used for an actual
   * mem_map, we use section_mem_map to store the section's NUMA
   * node.  This keeps us from having to use another data structure.  The
   * node information is cleared just before we store the real mem_map.
   */
  static inline unsigned long sparse_encode_early_nid(int nid)
  {
  	return (nid << SECTION_NID_SHIFT);
  }
  
  static inline int sparse_early_nid(struct mem_section *section)
  {
  	return (section->section_mem_map >> SECTION_NID_SHIFT);
  }
2dbb51c49   Mel Gorman   mm: make defensiv...
147
148
149
  /* Validate the physical addressing limitations of the model */
  void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
  						unsigned long *end_pfn)
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
150
  {
2dbb51c49   Mel Gorman   mm: make defensiv...
151
  	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
152

bead9a3ab   Ingo Molnar   mm: sparsemem mem...
153
154
155
156
  	/*
  	 * Sanity checks - do not allow an architecture to pass
  	 * in larger pfns than the maximum scope of sparsemem:
  	 */
2dbb51c49   Mel Gorman   mm: make defensiv...
157
158
159
160
161
162
163
164
  	if (*start_pfn > max_sparsemem_pfn) {
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*start_pfn = max_sparsemem_pfn;
  		*end_pfn = max_sparsemem_pfn;
ef161a986   Cyrill Gorcunov   mm: mminit_valida...
165
  	} else if (*end_pfn > max_sparsemem_pfn) {
2dbb51c49   Mel Gorman   mm: make defensiv...
166
167
168
169
170
171
172
173
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"End of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*end_pfn = max_sparsemem_pfn;
  	}
  }
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
174
175
176
177
178
179
180
181
182
  /*
   * There are a number of times that we loop over NR_MEM_SECTIONS,
   * looking for section_present() on each.  But, when we have very
   * large physical address spaces, NR_MEM_SECTIONS can also be
   * very large which makes the loops quite long.
   *
   * Keeping track of this gives us an easy way to break out of
   * those loops early.
   */
2491f0a2c   David Hildenbrand   mm: section numbe...
183
  unsigned long __highest_present_section_nr;
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
184
185
  static void section_mark_present(struct mem_section *ms)
  {
2491f0a2c   David Hildenbrand   mm: section numbe...
186
  	unsigned long section_nr = __section_nr(ms);
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
187
188
189
190
191
192
  
  	if (section_nr > __highest_present_section_nr)
  		__highest_present_section_nr = section_nr;
  
  	ms->section_mem_map |= SECTION_MARKED_PRESENT;
  }
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
193
194
  #define for_each_present_section_nr(start, section_nr)		\
  	for (section_nr = next_present_section_nr(start-1);	\
d778015ac   Qian Cai   mm/sparse: fix a ...
195
  	     ((section_nr != -1) &&				\
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
196
197
  	      (section_nr <= __highest_present_section_nr));	\
  	     section_nr = next_present_section_nr(section_nr))
85c77f791   Pavel Tatashin   mm/sparse: add ne...
198
199
200
201
  static inline unsigned long first_present_section_nr(void)
  {
  	return next_present_section_nr(-1);
  }
0a9f9f623   Baoquan He   mm/sparse.c: only...
202
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
758b8db4a   Yi Wang   mm: fix -Wmissing...
203
  static void subsection_mask_set(unsigned long *map, unsigned long pfn,
f46edbd1b   Dan Williams   mm/sparsemem: add...
204
205
206
207
208
209
210
211
212
213
214
  		unsigned long nr_pages)
  {
  	int idx = subsection_map_index(pfn);
  	int end = subsection_map_index(pfn + nr_pages - 1);
  
  	bitmap_set(map, idx, end - idx + 1);
  }
  
  void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
  {
  	int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
9a8450304   Dan Williams   mm/sparsemem: cle...
215
  	unsigned long nr, start_sec = pfn_to_section_nr(pfn);
f46edbd1b   Dan Williams   mm/sparsemem: add...
216
217
218
  
  	if (!nr_pages)
  		return;
9a8450304   Dan Williams   mm/sparsemem: cle...
219
  	for (nr = start_sec; nr <= end_sec; nr++) {
f46edbd1b   Dan Williams   mm/sparsemem: add...
220
221
222
223
224
  		struct mem_section *ms;
  		unsigned long pfns;
  
  		pfns = min(nr_pages, PAGES_PER_SECTION
  				- (pfn & ~PAGE_SECTION_MASK));
9a8450304   Dan Williams   mm/sparsemem: cle...
225
  		ms = __nr_to_section(nr);
f46edbd1b   Dan Williams   mm/sparsemem: add...
226
  		subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
9a8450304   Dan Williams   mm/sparsemem: cle...
227
228
  		pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)
  ", __func__, nr,
f46edbd1b   Dan Williams   mm/sparsemem: add...
229
230
231
232
233
234
235
  				pfns, subsection_map_index(pfn),
  				subsection_map_index(pfn + pfns - 1));
  
  		pfn += pfns;
  		nr_pages -= pfns;
  	}
  }
0a9f9f623   Baoquan He   mm/sparse.c: only...
236
237
238
239
240
  #else
  void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
  {
  }
  #endif
f46edbd1b   Dan Williams   mm/sparsemem: add...
241

2dbb51c49   Mel Gorman   mm: make defensiv...
242
  /* Record a memory area against a node. */
c89ab04fe   Mike Rapoport   mm/sparse: cleanu...
243
  static void __init memory_present(int nid, unsigned long start, unsigned long end)
2dbb51c49   Mel Gorman   mm: make defensiv...
244
245
  {
  	unsigned long pfn;
bead9a3ab   Ingo Molnar   mm: sparsemem mem...
246

629a359bd   Kirill A. Shutemov   mm/sparsemem: Fix...
247
248
249
  #ifdef CONFIG_SPARSEMEM_EXTREME
  	if (unlikely(!mem_section)) {
  		unsigned long size, align;
d09cfbbfa   Baoquan He   mm/sparse.c: wron...
250
  		size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
629a359bd   Kirill A. Shutemov   mm/sparsemem: Fix...
251
  		align = 1 << (INTERNODE_CACHE_SHIFT);
eb31d559f   Mike Rapoport   memblock: remove ...
252
  		mem_section = memblock_alloc(size, align);
8a7f97b90   Mike Rapoport   treewide: add che...
253
254
255
256
  		if (!mem_section)
  			panic("%s: Failed to allocate %lu bytes align=0x%lx
  ",
  			      __func__, size, align);
629a359bd   Kirill A. Shutemov   mm/sparsemem: Fix...
257
258
  	}
  #endif
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
259
  	start &= PAGE_SECTION_MASK;
2dbb51c49   Mel Gorman   mm: make defensiv...
260
  	mminit_validate_memmodel_limits(&start, &end);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
261
262
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
  		unsigned long section = pfn_to_section_nr(pfn);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
263
264
265
  		struct mem_section *ms;
  
  		sparse_index_init(section, nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
266
  		set_section_nid(section, nid);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
267
268
  
  		ms = __nr_to_section(section);
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
269
  		if (!ms->section_mem_map) {
2d070eab2   Michal Hocko   mm: consider zone...
270
271
  			ms->section_mem_map = sparse_encode_early_nid(nid) |
  							SECTION_IS_ONLINE;
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
272
273
  			section_mark_present(ms);
  		}
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
274
275
276
277
  	}
  }
  
  /*
c89ab04fe   Mike Rapoport   mm/sparse: cleanu...
278
279
280
   * Mark all memblocks as present using memory_present().
   * This is a convenience function that is useful to mark all of the systems
   * memory as present during initialization.
9def36e0f   Logan Gunthorpe   mm/sparse: add co...
281
   */
c89ab04fe   Mike Rapoport   mm/sparse: cleanu...
282
  static void __init memblocks_present(void)
9def36e0f   Logan Gunthorpe   mm/sparse: add co...
283
  {
c9118e6c3   Mike Rapoport   arch, mm: replace...
284
285
  	unsigned long start, end;
  	int i, nid;
9def36e0f   Logan Gunthorpe   mm/sparse: add co...
286

c9118e6c3   Mike Rapoport   arch, mm: replace...
287
288
  	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid)
  		memory_present(nid, start, end);
9def36e0f   Logan Gunthorpe   mm/sparse: add co...
289
290
291
  }
  
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
292
293
294
295
296
297
   * Subtle, we encode the real pfn into the mem_map such that
   * the identity pfn - section_mem_map will return the actual
   * physical page frame number.
   */
  static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
  {
def9b71ee   Petr Tesarik   include/linux/mmz...
298
299
300
301
302
  	unsigned long coded_mem_map =
  		(unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
  	BUILD_BUG_ON(SECTION_MAP_LAST_BIT > (1UL<<PFN_SECTION_SHIFT));
  	BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
  	return coded_mem_map;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
303
  }
3a0aaefe4   David Hildenbrand   mm/memory_hotplug...
304
  #ifdef CONFIG_MEMORY_HOTPLUG
29751f699   Andy Whitcroft   [PATCH] sparsemem...
305
  /*
ea01ea937   Badari Pulavarty   hotplug memory re...
306
   * Decode mem_map from the coded memmap
29751f699   Andy Whitcroft   [PATCH] sparsemem...
307
   */
29751f699   Andy Whitcroft   [PATCH] sparsemem...
308
309
  struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
  {
ea01ea937   Badari Pulavarty   hotplug memory re...
310
311
  	/* mask off the extra low bits of information */
  	coded_mem_map &= SECTION_MAP_MASK;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
312
313
  	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
  }
3a0aaefe4   David Hildenbrand   mm/memory_hotplug...
314
  #endif /* CONFIG_MEMORY_HOTPLUG */
29751f699   Andy Whitcroft   [PATCH] sparsemem...
315

4e40987f1   Oscar Salvador   mm/sparse.c: make...
316
  static void __meminit sparse_init_one_section(struct mem_section *ms,
5c0e30664   Mel Gorman   Fix corruption of...
317
  		unsigned long pnum, struct page *mem_map,
326e1b8f8   Dan Williams   mm/sparsemem: int...
318
  		struct mem_section_usage *usage, unsigned long flags)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
319
  {
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
320
  	ms->section_mem_map &= ~SECTION_MAP_MASK;
326e1b8f8   Dan Williams   mm/sparsemem: int...
321
322
  	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum)
  		| SECTION_HAS_MEM_MAP | flags;
f1eca35a0   Dan Williams   mm/sparsemem: int...
323
  	ms->usage = usage;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
324
  }
f1eca35a0   Dan Williams   mm/sparsemem: int...
325
  static unsigned long usemap_size(void)
5c0e30664   Mel Gorman   Fix corruption of...
326
  {
60a7a88db   Wei Yang   mm/sparse: refine...
327
  	return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
5c0e30664   Mel Gorman   Fix corruption of...
328
  }
f1eca35a0   Dan Williams   mm/sparsemem: int...
329
  size_t mem_section_usage_size(void)
5c0e30664   Mel Gorman   Fix corruption of...
330
  {
f1eca35a0   Dan Williams   mm/sparsemem: int...
331
  	return sizeof(struct mem_section_usage) + usemap_size();
5c0e30664   Mel Gorman   Fix corruption of...
332
  }
5c0e30664   Mel Gorman   Fix corruption of...
333

48c906823   Yasunori Goto   memory hotplug: a...
334
  #ifdef CONFIG_MEMORY_HOTREMOVE
f1eca35a0   Dan Williams   mm/sparsemem: int...
335
  static struct mem_section_usage * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
336
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
238305bb4   Johannes Weiner   mm: remove sparse...
337
  					 unsigned long size)
48c906823   Yasunori Goto   memory hotplug: a...
338
  {
f1eca35a0   Dan Williams   mm/sparsemem: int...
339
  	struct mem_section_usage *usage;
99ab7b194   Yinghai Lu   mm: sparse: fix u...
340
  	unsigned long goal, limit;
99ab7b194   Yinghai Lu   mm: sparse: fix u...
341
  	int nid;
48c906823   Yasunori Goto   memory hotplug: a...
342
343
344
  	/*
  	 * A page may contain usemaps for other sections preventing the
  	 * page being freed and making a section unremovable while
c800bcd5f   Li Zhong   sparse: fix comment
345
  	 * other sections referencing the usemap remain active. Similarly,
48c906823   Yasunori Goto   memory hotplug: a...
346
347
348
349
350
351
  	 * a pgdat can prevent a section being removed. If section A
  	 * contains a pgdat and section B contains the usemap, both
  	 * sections become inter-dependent. This allocates usemaps
  	 * from the same section as the pgdat where possible to avoid
  	 * this problem.
  	 */
07b4e2bc9   Yinghai Lu   mm: sparse: fix s...
352
  	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
99ab7b194   Yinghai Lu   mm: sparse: fix u...
353
354
355
  	limit = goal + (1UL << PA_SECTION_SHIFT);
  	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
  again:
f1eca35a0   Dan Williams   mm/sparsemem: int...
356
357
  	usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
  	if (!usage && limit) {
99ab7b194   Yinghai Lu   mm: sparse: fix u...
358
359
360
  		limit = 0;
  		goto again;
  	}
f1eca35a0   Dan Williams   mm/sparsemem: int...
361
  	return usage;
48c906823   Yasunori Goto   memory hotplug: a...
362
  }
f1eca35a0   Dan Williams   mm/sparsemem: int...
363
364
  static void __init check_usemap_section_nr(int nid,
  		struct mem_section_usage *usage)
48c906823   Yasunori Goto   memory hotplug: a...
365
366
  {
  	unsigned long usemap_snr, pgdat_snr;
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
367
368
  	static unsigned long old_usemap_snr;
  	static unsigned long old_pgdat_snr;
48c906823   Yasunori Goto   memory hotplug: a...
369
370
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int usemap_nid;
83e3c4872   Kirill A. Shutemov   mm/sparsemem: All...
371
372
373
374
375
  	/* First call */
  	if (!old_usemap_snr) {
  		old_usemap_snr = NR_MEM_SECTIONS;
  		old_pgdat_snr = NR_MEM_SECTIONS;
  	}
f1eca35a0   Dan Williams   mm/sparsemem: int...
376
  	usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
48c906823   Yasunori Goto   memory hotplug: a...
377
378
379
380
381
382
383
384
385
386
387
388
389
  	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
  	if (usemap_snr == pgdat_snr)
  		return;
  
  	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
  		/* skip redundant message */
  		return;
  
  	old_usemap_snr = usemap_snr;
  	old_pgdat_snr = pgdat_snr;
  
  	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
  	if (usemap_nid != nid) {
1170532bb   Joe Perches   mm: convert print...
390
391
392
  		pr_info("node %d must be removed before remove section %ld
  ",
  			nid, usemap_snr);
48c906823   Yasunori Goto   memory hotplug: a...
393
394
395
396
397
398
399
400
  		return;
  	}
  	/*
  	 * There is a circular dependency.
  	 * Some platforms allow un-removable section because they will just
  	 * gather other removable sections for dynamic partitioning.
  	 * Just notify un-removable section's number here.
  	 */
1170532bb   Joe Perches   mm: convert print...
401
402
403
  	pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations
  ",
  		usemap_snr, pgdat_snr, nid);
48c906823   Yasunori Goto   memory hotplug: a...
404
405
  }
  #else
f1eca35a0   Dan Williams   mm/sparsemem: int...
406
  static struct mem_section_usage * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
407
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
238305bb4   Johannes Weiner   mm: remove sparse...
408
  					 unsigned long size)
48c906823   Yasunori Goto   memory hotplug: a...
409
  {
26fb3dae0   Mike Rapoport   memblock: drop me...
410
  	return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id);
48c906823   Yasunori Goto   memory hotplug: a...
411
  }
f1eca35a0   Dan Williams   mm/sparsemem: int...
412
413
  static void __init check_usemap_section_nr(int nid,
  		struct mem_section_usage *usage)
48c906823   Yasunori Goto   memory hotplug: a...
414
415
416
  {
  }
  #endif /* CONFIG_MEMORY_HOTREMOVE */
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
417
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
afda57bc1   Pavel Tatashin   mm/sparse: move b...
418
  static unsigned long __init section_map_size(void)
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
419
420
421
422
423
  {
  	return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
  }
  
  #else
afda57bc1   Pavel Tatashin   mm/sparse: move b...
424
  static unsigned long __init section_map_size(void)
e131c06b1   Pavel Tatashin   mm/sparse: use th...
425
426
427
  {
  	return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
  }
e9c0a3f05   Dan Williams   mm/sparsemem: con...
428
429
  struct page __init *__populate_section_memmap(unsigned long pfn,
  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
430
  {
e131c06b1   Pavel Tatashin   mm/sparse: use th...
431
432
  	unsigned long size = section_map_size();
  	struct page *map = sparse_buffer_alloc(size);
8a7f97b90   Mike Rapoport   treewide: add che...
433
  	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
e131c06b1   Pavel Tatashin   mm/sparse: use th...
434
435
436
  
  	if (map)
  		return map;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
437

09dbcf422   Michal Hocko   mm/sparse.c: do n...
438
  	map = memblock_alloc_try_nid_raw(size, size, addr,
97ad1087e   Mike Rapoport   memblock: replace...
439
  					  MEMBLOCK_ALLOC_ACCESSIBLE, nid);
8a7f97b90   Mike Rapoport   treewide: add che...
440
441
442
443
  	if (!map)
  		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa
  ",
  		      __func__, size, PAGE_SIZE, nid, &addr);
8f6aac419   Christoph Lameter   Generic Virtual M...
444
445
446
  	return map;
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
447
448
  static void *sparsemap_buf __meminitdata;
  static void *sparsemap_buf_end __meminitdata;
ae8318940   Lecopzer Chen   mm/sparse.c: fix ...
449
450
451
452
453
  static inline void __meminit sparse_buffer_free(unsigned long size)
  {
  	WARN_ON(!sparsemap_buf || size == 0);
  	memblock_free_early(__pa(sparsemap_buf), size);
  }
afda57bc1   Pavel Tatashin   mm/sparse: move b...
454
  static void __init sparse_buffer_init(unsigned long size, int nid)
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
455
  {
8a7f97b90   Mike Rapoport   treewide: add che...
456
  	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
457
  	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
09dbcf422   Michal Hocko   mm/sparse.c: do n...
458
459
460
461
462
  	/*
  	 * Pre-allocated buffer is mainly used by __populate_section_memmap
  	 * and we want it to be properly aligned to the section size - this is
  	 * especially the case for VMEMMAP which maps memmap to PMDs
  	 */
0ac398b17   Yunfeng Ye   mm: support membl...
463
  	sparsemap_buf = memblock_alloc_exact_nid_raw(size, section_map_size(),
09dbcf422   Michal Hocko   mm/sparse.c: do n...
464
  					addr, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
465
466
  	sparsemap_buf_end = sparsemap_buf + size;
  }
afda57bc1   Pavel Tatashin   mm/sparse: move b...
467
  static void __init sparse_buffer_fini(void)
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
468
469
470
471
  {
  	unsigned long size = sparsemap_buf_end - sparsemap_buf;
  
  	if (sparsemap_buf && size > 0)
ae8318940   Lecopzer Chen   mm/sparse.c: fix ...
472
  		sparse_buffer_free(size);
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
473
474
475
476
477
478
479
480
  	sparsemap_buf = NULL;
  }
  
  void * __meminit sparse_buffer_alloc(unsigned long size)
  {
  	void *ptr = NULL;
  
  	if (sparsemap_buf) {
db57e98d8   Lecopzer Chen   mm/sparse.c: fix ...
481
  		ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
482
483
  		if (ptr + size > sparsemap_buf_end)
  			ptr = NULL;
ae8318940   Lecopzer Chen   mm/sparse.c: fix ...
484
485
486
487
  		else {
  			/* Free redundant aligned space */
  			if ((unsigned long)(ptr - sparsemap_buf) > 0)
  				sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
488
  			sparsemap_buf = ptr + size;
ae8318940   Lecopzer Chen   mm/sparse.c: fix ...
489
  		}
35fd1eb1e   Pavel Tatashin   mm/sparse: abstra...
490
491
492
  	}
  	return ptr;
  }
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
493
  void __weak __meminit vmemmap_populate_print_last(void)
c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
494
495
  {
  }
a4322e1ba   Yinghai Lu   sparsemem: Put us...
496

85c77f791   Pavel Tatashin   mm/sparse: add ne...
497
498
499
500
501
502
503
504
  /*
   * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
   * And number of present sections in this node is map_count.
   */
  static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
  				   unsigned long pnum_end,
  				   unsigned long map_count)
  {
f1eca35a0   Dan Williams   mm/sparsemem: int...
505
506
  	struct mem_section_usage *usage;
  	unsigned long pnum;
85c77f791   Pavel Tatashin   mm/sparse: add ne...
507
  	struct page *map;
f1eca35a0   Dan Williams   mm/sparsemem: int...
508
509
510
  	usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
  			mem_section_usage_size() * map_count);
  	if (!usage) {
85c77f791   Pavel Tatashin   mm/sparse: add ne...
511
512
513
514
515
  		pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
  		goto failed;
  	}
  	sparse_buffer_init(map_count * section_map_size(), nid);
  	for_each_present_section_nr(pnum_begin, pnum) {
e9c0a3f05   Dan Williams   mm/sparsemem: con...
516
  		unsigned long pfn = section_nr_to_pfn(pnum);
85c77f791   Pavel Tatashin   mm/sparse: add ne...
517
518
  		if (pnum >= pnum_end)
  			break;
e9c0a3f05   Dan Williams   mm/sparsemem: con...
519
520
  		map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
  				nid, NULL);
85c77f791   Pavel Tatashin   mm/sparse: add ne...
521
522
523
524
525
526
  		if (!map) {
  			pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
  			       __func__, nid);
  			pnum_begin = pnum;
  			goto failed;
  		}
f1eca35a0   Dan Williams   mm/sparsemem: int...
527
  		check_usemap_section_nr(nid, usage);
326e1b8f8   Dan Williams   mm/sparsemem: int...
528
529
  		sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage,
  				SECTION_IS_EARLY);
f1eca35a0   Dan Williams   mm/sparsemem: int...
530
  		usage = (void *) usage + mem_section_usage_size();
85c77f791   Pavel Tatashin   mm/sparse: add ne...
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
  	}
  	sparse_buffer_fini();
  	return;
  failed:
  	/* We failed to allocate, mark all the following pnums as not present */
  	for_each_present_section_nr(pnum_begin, pnum) {
  		struct mem_section *ms;
  
  		if (pnum >= pnum_end)
  			break;
  		ms = __nr_to_section(pnum);
  		ms->section_mem_map = 0;
  	}
  }
  
  /*
   * Allocate the accumulated non-linear sections, allocate a mem_map
   * for each and record the physical to section mapping.
   */
2a3cb8bae   Pavel Tatashin   mm/sparse: delete...
550
  void __init sparse_init(void)
85c77f791   Pavel Tatashin   mm/sparse: add ne...
551
  {
c89ab04fe   Mike Rapoport   mm/sparse: cleanu...
552
553
554
555
556
557
558
  	unsigned long pnum_end, pnum_begin, map_count = 1;
  	int nid_begin;
  
  	memblocks_present();
  
  	pnum_begin = first_present_section_nr();
  	nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
85c77f791   Pavel Tatashin   mm/sparse: add ne...
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
  
  	/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
  	set_pageblock_order();
  
  	for_each_present_section_nr(pnum_begin + 1, pnum_end) {
  		int nid = sparse_early_nid(__nr_to_section(pnum_end));
  
  		if (nid == nid_begin) {
  			map_count++;
  			continue;
  		}
  		/* Init node with sections in range [pnum_begin, pnum_end) */
  		sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
  		nid_begin = nid;
  		pnum_begin = pnum_end;
  		map_count = 1;
  	}
  	/* cover the last node */
  	sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
  	vmemmap_populate_print_last();
  }
193faea92   Stephen Rothwell   Move three functi...
580
  #ifdef CONFIG_MEMORY_HOTPLUG
2d070eab2   Michal Hocko   mm: consider zone...
581
582
583
584
585
586
587
  
  /* Mark all memory sections within the pfn range as online */
  void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
b4ccec41a   Michal Hocko   mm/sparse.c: fix ...
588
  		unsigned long section_nr = pfn_to_section_nr(pfn);
2d070eab2   Michal Hocko   mm: consider zone...
589
590
591
592
593
594
595
596
597
598
599
600
  		struct mem_section *ms;
  
  		/* onlining code should never touch invalid ranges */
  		if (WARN_ON(!valid_section_nr(section_nr)))
  			continue;
  
  		ms = __nr_to_section(section_nr);
  		ms->section_mem_map |= SECTION_IS_ONLINE;
  	}
  }
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
9b7ea46a8   Qian Cai   mm/hotplug: fix o...
601
  /* Mark all memory sections within the pfn range as offline */
2d070eab2   Michal Hocko   mm: consider zone...
602
603
604
605
606
  void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
27227c733   Pavel Tatashin   mm: sections are ...
607
  		unsigned long section_nr = pfn_to_section_nr(pfn);
2d070eab2   Michal Hocko   mm: consider zone...
608
609
610
611
612
613
614
615
616
617
618
619
620
621
  		struct mem_section *ms;
  
  		/*
  		 * TODO this needs some double checking. Offlining code makes
  		 * sure to check pfn_valid but those checks might be just bogus
  		 */
  		if (WARN_ON(!valid_section_nr(section_nr)))
  			continue;
  
  		ms = __nr_to_section(section_nr);
  		ms->section_mem_map &= ~SECTION_IS_ONLINE;
  	}
  }
  #endif
98f3cfc1d   Yasunori Goto   memory hotplug: H...
622
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
030eab4f9   Ilya Leoshkevich   mm/sparse.c: mark...
623
  static struct page * __meminit populate_section_memmap(unsigned long pfn,
e9c0a3f05   Dan Williams   mm/sparsemem: con...
624
  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
98f3cfc1d   Yasunori Goto   memory hotplug: H...
625
  {
e9c0a3f05   Dan Williams   mm/sparsemem: con...
626
  	return __populate_section_memmap(pfn, nr_pages, nid, altmap);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
627
  }
e9c0a3f05   Dan Williams   mm/sparsemem: con...
628
629
  
  static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
24b6d4164   Christoph Hellwig   mm: pass the vmem...
630
  		struct vmem_altmap *altmap)
98f3cfc1d   Yasunori Goto   memory hotplug: H...
631
  {
e9c0a3f05   Dan Williams   mm/sparsemem: con...
632
633
  	unsigned long start = (unsigned long) pfn_to_page(pfn);
  	unsigned long end = start + nr_pages * sizeof(struct page);
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
634

24b6d4164   Christoph Hellwig   mm: pass the vmem...
635
  	vmemmap_free(start, end, altmap);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
636
  }
81556b025   Zhang Yanfei   mm/sparsemem: fix...
637
  static void free_map_bootmem(struct page *memmap)
0c0a4a517   Yasunori Goto   memory hotplug: f...
638
  {
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
639
  	unsigned long start = (unsigned long)memmap;
81556b025   Zhang Yanfei   mm/sparsemem: fix...
640
  	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
641

24b6d4164   Christoph Hellwig   mm: pass the vmem...
642
  	vmemmap_free(start, end, NULL);
0c0a4a517   Yasunori Goto   memory hotplug: f...
643
  }
6ecb0fc61   Baoquan He   mm/sparse.c: move...
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
  
  static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
  {
  	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
  	DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
  	struct mem_section *ms = __pfn_to_section(pfn);
  	unsigned long *subsection_map = ms->usage
  		? &ms->usage->subsection_map[0] : NULL;
  
  	subsection_mask_set(map, pfn, nr_pages);
  	if (subsection_map)
  		bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
  
  	if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
  				"section already deactivated (%#lx + %ld)
  ",
  				pfn, nr_pages))
  		return -EINVAL;
  
  	bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
  	return 0;
  }
  
  static bool is_subsection_map_empty(struct mem_section *ms)
  {
  	return bitmap_empty(&ms->usage->subsection_map[0],
  			    SUBSECTIONS_PER_SECTION);
  }
  
  static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
  {
  	struct mem_section *ms = __pfn_to_section(pfn);
  	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
  	unsigned long *subsection_map;
  	int rc = 0;
  
  	subsection_mask_set(map, pfn, nr_pages);
  
  	subsection_map = &ms->usage->subsection_map[0];
  
  	if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
  		rc = -EINVAL;
  	else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
  		rc = -EEXIST;
  	else
  		bitmap_or(subsection_map, map, subsection_map,
  				SUBSECTIONS_PER_SECTION);
  
  	return rc;
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
694
  #else
030eab4f9   Ilya Leoshkevich   mm/sparse.c: mark...
695
  struct page * __meminit populate_section_memmap(unsigned long pfn,
e9c0a3f05   Dan Williams   mm/sparsemem: con...
696
  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
0b0acbec1   Dave Hansen   [PATCH] memory ho...
697
  {
4027149ab   Baoquan He   mm/sparse.c: allo...
698
699
  	return kvmalloc_node(array_size(sizeof(struct page),
  					PAGES_PER_SECTION), GFP_KERNEL, nid);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
700
  }
e9c0a3f05   Dan Williams   mm/sparsemem: con...
701
  static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
7b73d978a   Christoph Hellwig   mm: pass the vmem...
702
  		struct vmem_altmap *altmap)
98f3cfc1d   Yasunori Goto   memory hotplug: H...
703
  {
3af776f60   Baoquan He   mm/sparse.c: use ...
704
  	kvfree(pfn_to_page(pfn));
0b0acbec1   Dave Hansen   [PATCH] memory ho...
705
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
706

81556b025   Zhang Yanfei   mm/sparsemem: fix...
707
  static void free_map_bootmem(struct page *memmap)
0c0a4a517   Yasunori Goto   memory hotplug: f...
708
709
  {
  	unsigned long maps_section_nr, removing_section_nr, i;
81556b025   Zhang Yanfei   mm/sparsemem: fix...
710
  	unsigned long magic, nr_pages;
ae64ffcac   Jianguo Wu   mm/vmemmap: fix w...
711
  	struct page *page = virt_to_page(memmap);
0c0a4a517   Yasunori Goto   memory hotplug: f...
712

81556b025   Zhang Yanfei   mm/sparsemem: fix...
713
714
  	nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
  		>> PAGE_SHIFT;
0c0a4a517   Yasunori Goto   memory hotplug: f...
715
  	for (i = 0; i < nr_pages; i++, page++) {
ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
716
  		magic = (unsigned long) page->freelist;
0c0a4a517   Yasunori Goto   memory hotplug: f...
717
718
719
720
  
  		BUG_ON(magic == NODE_INFO);
  
  		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
857e522a0   Yasuaki Ishimatsu   mm/sparse: use pa...
721
  		removing_section_nr = page_private(page);
0c0a4a517   Yasunori Goto   memory hotplug: f...
722
723
724
725
726
727
728
729
730
731
732
733
734
  
  		/*
  		 * When this function is called, the removing section is
  		 * logical offlined state. This means all pages are isolated
  		 * from page allocator. If removing section's memmap is placed
  		 * on the same section, it must not be freed.
  		 * If it is freed, page allocator may allocate it which will
  		 * be removed physically soon.
  		 */
  		if (maps_section_nr != removing_section_nr)
  			put_page_bootmem(page);
  	}
  }
0b0acbec1   Dave Hansen   [PATCH] memory ho...
735

37bc15020   Baoquan He   mm/sparse.c: intr...
736
  static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
737
  {
37bc15020   Baoquan He   mm/sparse.c: intr...
738
739
740
741
742
  	return 0;
  }
  
  static bool is_subsection_map_empty(struct mem_section *ms)
  {
6ecb0fc61   Baoquan He   mm/sparse.c: move...
743
  	return true;
0a9f9f623   Baoquan He   mm/sparse.c: only...
744
  }
6ecb0fc61   Baoquan He   mm/sparse.c: move...
745
  static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
0a9f9f623   Baoquan He   mm/sparse.c: only...
746
  {
6ecb0fc61   Baoquan He   mm/sparse.c: move...
747
  	return 0;
0a9f9f623   Baoquan He   mm/sparse.c: only...
748
  }
6ecb0fc61   Baoquan He   mm/sparse.c: move...
749
  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
37bc15020   Baoquan He   mm/sparse.c: intr...
750

95a5a34df   Baoquan He   mm/sparse.c: add ...
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
  /*
   * To deactivate a memory region, there are 3 cases to handle across
   * two configurations (SPARSEMEM_VMEMMAP={y,n}):
   *
   * 1. deactivation of a partial hot-added section (only possible in
   *    the SPARSEMEM_VMEMMAP=y case).
   *      a) section was present at memory init.
   *      b) section was hot-added post memory init.
   * 2. deactivation of a complete hot-added section.
   * 3. deactivation of a complete section from memory init.
   *
   * For 1, when subsection_map does not empty we will not be freeing the
   * usage map, but still need to free the vmemmap range.
   *
   * For 2 and 3, the SPARSEMEM_VMEMMAP={y,n} cases are unified
   */
37bc15020   Baoquan He   mm/sparse.c: intr...
767
768
769
770
771
772
773
774
775
776
  static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
  		struct vmem_altmap *altmap)
  {
  	struct mem_section *ms = __pfn_to_section(pfn);
  	bool section_is_early = early_section(ms);
  	struct page *memmap = NULL;
  	bool empty;
  
  	if (clear_subsection_map(pfn, nr_pages))
  		return;
95a5a34df   Baoquan He   mm/sparse.c: add ...
777

37bc15020   Baoquan He   mm/sparse.c: intr...
778
  	empty = is_subsection_map_empty(ms);
d41e2f3bd   Baoquan He   mm/hotplug: fix h...
779
  	if (empty) {
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
780
  		unsigned long section_nr = pfn_to_section_nr(pfn);
8068df3b6   David Hildenbrand   mm/memory_hotplug...
781
782
783
784
785
786
787
788
  		/*
  		 * When removing an early section, the usage map is kept (as the
  		 * usage maps of other sections fall into the same page). It
  		 * will be re-used when re-adding the section - which is then no
  		 * longer an early section. If the usage map is PageReserved, it
  		 * was allocated during boot.
  		 */
  		if (!PageReserved(virt_to_page(ms->usage))) {
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
789
790
791
792
  			kfree(ms->usage);
  			ms->usage = NULL;
  		}
  		memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
b943f045a   Aneesh Kumar K.V   mm/sparse: fix ke...
793
794
795
796
797
798
  		/*
  		 * Mark the section invalid so that valid_section()
  		 * return false. This prevents code from dereferencing
  		 * ms->usage array.
  		 */
  		ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
799
  	}
ef69bc9f6   Wei Yang   mm/sparse: never ...
800
801
802
803
804
  	/*
  	 * The memmap of early sections is always fully populated. See
  	 * section_activate() and pfn_valid() .
  	 */
  	if (!section_is_early)
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
805
  		depopulate_section_memmap(pfn, nr_pages, altmap);
ef69bc9f6   Wei Yang   mm/sparse: never ...
806
807
  	else if (memmap)
  		free_map_bootmem(memmap);
d41e2f3bd   Baoquan He   mm/hotplug: fix h...
808
809
810
  
  	if (empty)
  		ms->section_mem_map = (unsigned long)NULL;
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
811
  }
5d87255ca   Baoquan He   mm/sparse.c: intr...
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
  static struct page * __meminit section_activate(int nid, unsigned long pfn,
  		unsigned long nr_pages, struct vmem_altmap *altmap)
  {
  	struct mem_section *ms = __pfn_to_section(pfn);
  	struct mem_section_usage *usage = NULL;
  	struct page *memmap;
  	int rc = 0;
  
  	if (!ms->usage) {
  		usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
  		if (!usage)
  			return ERR_PTR(-ENOMEM);
  		ms->usage = usage;
  	}
  
  	rc = fill_subsection_map(pfn, nr_pages);
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
  	if (rc) {
  		if (usage)
  			ms->usage = NULL;
  		kfree(usage);
  		return ERR_PTR(rc);
  	}
  
  	/*
  	 * The early init code does not consider partially populated
  	 * initial sections, it simply assumes that memory will never be
  	 * referenced.  If we hot-add memory into such a section then we
  	 * do not need to populate the memmap and can simply reuse what
  	 * is already there.
  	 */
  	if (nr_pages < PAGES_PER_SECTION && early_section(ms))
  		return pfn_to_page(pfn);
  
  	memmap = populate_section_memmap(pfn, nr_pages, nid, altmap);
  	if (!memmap) {
  		section_deactivate(pfn, nr_pages, altmap);
  		return ERR_PTR(-ENOMEM);
  	}
  
  	return memmap;
  }
7567cfc5d   Baoquan He   mm/sparse.c: clea...
853
  /**
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
854
   * sparse_add_section - add a memory section, or populate an existing one
7567cfc5d   Baoquan He   mm/sparse.c: clea...
855
856
   * @nid: The node to add section on
   * @start_pfn: start pfn of the memory range
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
857
   * @nr_pages: number of pfns to add in the section
7567cfc5d   Baoquan He   mm/sparse.c: clea...
858
859
860
861
   * @altmap: device page map
   *
   * This is only intended for hotplug.
   *
95a5a34df   Baoquan He   mm/sparse.c: add ...
862
863
864
865
   * Note that only VMEMMAP supports sub-section aligned hotplug,
   * the proper alignment and size are gated by check_pfn_span().
   *
   *
7567cfc5d   Baoquan He   mm/sparse.c: clea...
866
867
868
869
   * Return:
   * * 0		- On success.
   * * -EEXIST	- Section has been present.
   * * -ENOMEM	- Out of memory.
29751f699   Andy Whitcroft   [PATCH] sparsemem...
870
   */
7ea621604   Dan Williams   mm/sparsemem: pre...
871
872
  int __meminit sparse_add_section(int nid, unsigned long start_pfn,
  		unsigned long nr_pages, struct vmem_altmap *altmap)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
873
  {
0b0acbec1   Dave Hansen   [PATCH] memory ho...
874
  	unsigned long section_nr = pfn_to_section_nr(start_pfn);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
875
876
  	struct mem_section *ms;
  	struct page *memmap;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
877
  	int ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
878

4e0d2e7ef   Wei Yang   mm, sparse: pass ...
879
  	ret = sparse_index_init(section_nr, nid);
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
880
  	if (ret < 0)
bbd068259   WANG Cong   mm/sparse.c: impr...
881
  		return ret;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
882

ba72b4c8c   Dan Williams   mm/sparsemem: sup...
883
884
885
  	memmap = section_activate(nid, start_pfn, nr_pages, altmap);
  	if (IS_ERR(memmap))
  		return PTR_ERR(memmap);
5c0e30664   Mel Gorman   Fix corruption of...
886

d0dc12e86   Pavel Tatashin   mm/memory_hotplug...
887
888
889
890
  	/*
  	 * Poison uninitialized struct pages in order to catch invalid flags
  	 * combinations.
  	 */
18e19f195   Wei Yang   mm/sparsemem: pfn...
891
  	page_init_poison(memmap, sizeof(struct page) * nr_pages);
3ac19f8ef   Wen Congyang   memory-hotplug, m...
892

c1cbc3eeb   Wei Yang   mm/sparse.c: use ...
893
  	ms = __nr_to_section(section_nr);
26f26beda   Wei Yang   mm/sparse.c: set ...
894
  	set_section_nid(section_nr, nid);
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
895
  	section_mark_present(ms);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
896

ba72b4c8c   Dan Williams   mm/sparsemem: sup...
897
898
  	/* Align memmap to section boundary in the subsection case */
  	if (section_nr_to_pfn(section_nr) != start_pfn)
4627d76dc   Wei Yang   mm/sparsemem: get...
899
  		memmap = pfn_to_page(section_nr_to_pfn(section_nr));
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
900
901
902
  	sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
  
  	return 0;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
903
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
904

95a4774d0   Wen Congyang   memory-hotplug: u...
905
906
907
908
  #ifdef CONFIG_MEMORY_FAILURE
  static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
  {
  	int i;
5eb570a8d   Balbir Singh   mm/hotplug: optim...
909
910
911
912
913
914
915
916
  	/*
  	 * A further optimization is to have per section refcounted
  	 * num_poisoned_pages.  But that would need more space per memmap, so
  	 * for now just do a quick global check to speed up this routine in the
  	 * absence of bad pages.
  	 */
  	if (atomic_long_read(&num_poisoned_pages) == 0)
  		return;
4b94ffdc4   Dan Williams   x86, mm: introduc...
917
  	for (i = 0; i < nr_pages; i++) {
95a4774d0   Wen Congyang   memory-hotplug: u...
918
  		if (PageHWPoison(&memmap[i])) {
9f82883c6   Alastair D'Silva   mm/sparse.c: don'...
919
  			num_poisoned_pages_dec();
95a4774d0   Wen Congyang   memory-hotplug: u...
920
921
922
923
924
925
926
927
928
  			ClearPageHWPoison(&memmap[i]);
  		}
  	}
  }
  #else
  static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
  {
  }
  #endif
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
929
  void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
7ea621604   Dan Williams   mm/sparsemem: pre...
930
931
  		unsigned long nr_pages, unsigned long map_offset,
  		struct vmem_altmap *altmap)
ea01ea937   Badari Pulavarty   hotplug memory re...
932
  {
ba72b4c8c   Dan Williams   mm/sparsemem: sup...
933
934
935
  	clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
  			nr_pages - map_offset);
  	section_deactivate(pfn, nr_pages, altmap);
ea01ea937   Badari Pulavarty   hotplug memory re...
936
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
937
  #endif /* CONFIG_MEMORY_HOTPLUG */