Blame view

mm/sparse.c 23 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
2
3
4
  /*
   * sparse memory mappings.
   */
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
5
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
6
  #include <linux/slab.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
7
8
  #include <linux/mmzone.h>
  #include <linux/bootmem.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
9
  #include <linux/compiler.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
10
  #include <linux/highmem.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
11
  #include <linux/export.h>
28ae55c98   Dave Hansen   [PATCH] sparsemem...
12
  #include <linux/spinlock.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
13
  #include <linux/vmalloc.h>
3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
14

0c0a4a517   Yasunori Goto   memory hotplug: f...
15
  #include "internal.h"
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
16
  #include <asm/dma.h>
8f6aac419   Christoph Lameter   Generic Virtual M...
17
18
  #include <asm/pgalloc.h>
  #include <asm/pgtable.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
19
20
21
22
23
24
  
  /*
   * Permanent SPARSEMEM data:
   *
   * 1) mem_section	- memory sections, mem_map's for valid memory
   */
3e347261a   Bob Picco   [PATCH] sparsemem...
25
  #ifdef CONFIG_SPARSEMEM_EXTREME
4afaf6ea6   Kirill A. Shutemov   mm/sparsemem: All...
26
  struct mem_section **mem_section;
3e347261a   Bob Picco   [PATCH] sparsemem...
27
28
  #else
  struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
29
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
30
31
  #endif
  EXPORT_SYMBOL(mem_section);
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
32
33
34
35
36
37
38
39
40
41
42
  #ifdef NODE_NOT_IN_PAGE_FLAGS
  /*
   * If we did not store the node number in the page then we have to
   * do a lookup in the section_to_node_table in order to find which
   * node the page belongs to.
   */
  #if MAX_NUMNODES <= 256
  static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #else
  static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #endif
33dd4e0ec   Ian Campbell   mm: make some str...
43
  int page_to_nid(const struct page *page)
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
44
45
46
47
  {
  	return section_to_node_table[page_to_section(page)];
  }
  EXPORT_SYMBOL(page_to_nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
48
49
50
51
52
53
54
55
56
  
  static void set_section_nid(unsigned long section_nr, int nid)
  {
  	section_to_node_table[section_nr] = nid;
  }
  #else /* !NODE_NOT_IN_PAGE_FLAGS */
  static inline void set_section_nid(unsigned long section_nr, int nid)
  {
  }
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
57
  #endif
3e347261a   Bob Picco   [PATCH] sparsemem...
58
  #ifdef CONFIG_SPARSEMEM_EXTREME
bd721ea73   Fabian Frederick   treewide: replace...
59
  static noinline struct mem_section __ref *sparse_index_alloc(int nid)
28ae55c98   Dave Hansen   [PATCH] sparsemem...
60
61
62
63
  {
  	struct mem_section *section = NULL;
  	unsigned long array_size = SECTIONS_PER_ROOT *
  				   sizeof(struct mem_section);
b95046b04   Michal Hocko   mm, sparse, page_...
64
65
66
  	if (slab_is_available())
  		section = kzalloc_node(array_size, GFP_KERNEL, nid);
  	else
bb016b841   Santosh Shilimkar   mm/sparse: use me...
67
  		section = memblock_virt_alloc_node(array_size, nid);
28ae55c98   Dave Hansen   [PATCH] sparsemem...
68
69
  
  	return section;
3e347261a   Bob Picco   [PATCH] sparsemem...
70
  }
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
71

a3142c8e1   Yasunori Goto   Fix section misma...
72
  static int __meminit sparse_index_init(unsigned long section_nr, int nid)
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
73
  {
28ae55c98   Dave Hansen   [PATCH] sparsemem...
74
75
  	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  	struct mem_section *section;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
76
77
  
  	if (mem_section[root])
28ae55c98   Dave Hansen   [PATCH] sparsemem...
78
  		return -EEXIST;
3e347261a   Bob Picco   [PATCH] sparsemem...
79

28ae55c98   Dave Hansen   [PATCH] sparsemem...
80
  	section = sparse_index_alloc(nid);
af0cd5a7c   WANG Cong   mm/sparse.c: chec...
81
82
  	if (!section)
  		return -ENOMEM;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
83
84
  
  	mem_section[root] = section;
c1c951833   Gavin Shan   mm/sparse: remove...
85

9d1936cf8   Zhang Yanfei   mm/sparse: Remove...
86
  	return 0;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
87
88
89
90
91
  }
  #else /* !SPARSEMEM_EXTREME */
  static inline int sparse_index_init(unsigned long section_nr, int nid)
  {
  	return 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
92
  }
28ae55c98   Dave Hansen   [PATCH] sparsemem...
93
  #endif
91fd8b95d   Zhou Chengming   make __section_nr...
94
  #ifdef CONFIG_SPARSEMEM_EXTREME
4ca644d97   Dave Hansen   [PATCH] memory ho...
95
96
97
  int __section_nr(struct mem_section* ms)
  {
  	unsigned long root_nr;
4afaf6ea6   Kirill A. Shutemov   mm/sparsemem: All...
98
  	struct mem_section *root = NULL;
4ca644d97   Dave Hansen   [PATCH] memory ho...
99

12783b002   Mike Kravetz   [PATCH] SPARSEMEM...
100
101
  	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
  		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
4ca644d97   Dave Hansen   [PATCH] memory ho...
102
103
104
105
106
107
  		if (!root)
  			continue;
  
  		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
  		     break;
  	}
4afaf6ea6   Kirill A. Shutemov   mm/sparsemem: All...
108
  	VM_BUG_ON(!root);
db36a4611   Gavin Shan   mm/sparse: more c...
109

4ca644d97   Dave Hansen   [PATCH] memory ho...
110
111
  	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
  }
91fd8b95d   Zhou Chengming   make __section_nr...
112
113
114
115
116
117
  #else
  int __section_nr(struct mem_section* ms)
  {
  	return (int)(ms - mem_section[0]);
  }
  #endif
4ca644d97   Dave Hansen   [PATCH] memory ho...
118

30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
  /*
   * During early boot, before section_mem_map is used for an actual
   * mem_map, we use section_mem_map to store the section's NUMA
   * node.  This keeps us from having to use another data structure.  The
   * node information is cleared just before we store the real mem_map.
   */
  static inline unsigned long sparse_encode_early_nid(int nid)
  {
  	return (nid << SECTION_NID_SHIFT);
  }
  
  static inline int sparse_early_nid(struct mem_section *section)
  {
  	return (section->section_mem_map >> SECTION_NID_SHIFT);
  }
2dbb51c49   Mel Gorman   mm: make defensiv...
134
135
136
  /* Validate the physical addressing limitations of the model */
  void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
  						unsigned long *end_pfn)
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
137
  {
2dbb51c49   Mel Gorman   mm: make defensiv...
138
  	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
139

bead9a3ab   Ingo Molnar   mm: sparsemem mem...
140
141
142
143
  	/*
  	 * Sanity checks - do not allow an architecture to pass
  	 * in larger pfns than the maximum scope of sparsemem:
  	 */
2dbb51c49   Mel Gorman   mm: make defensiv...
144
145
146
147
148
149
150
151
  	if (*start_pfn > max_sparsemem_pfn) {
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*start_pfn = max_sparsemem_pfn;
  		*end_pfn = max_sparsemem_pfn;
ef161a986   Cyrill Gorcunov   mm: mminit_valida...
152
  	} else if (*end_pfn > max_sparsemem_pfn) {
2dbb51c49   Mel Gorman   mm: make defensiv...
153
154
155
156
157
158
159
160
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"End of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*end_pfn = max_sparsemem_pfn;
  	}
  }
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
  /*
   * There are a number of times that we loop over NR_MEM_SECTIONS,
   * looking for section_present() on each.  But, when we have very
   * large physical address spaces, NR_MEM_SECTIONS can also be
   * very large which makes the loops quite long.
   *
   * Keeping track of this gives us an easy way to break out of
   * those loops early.
   */
  int __highest_present_section_nr;
  static void section_mark_present(struct mem_section *ms)
  {
  	int section_nr = __section_nr(ms);
  
  	if (section_nr > __highest_present_section_nr)
  		__highest_present_section_nr = section_nr;
  
  	ms->section_mem_map |= SECTION_MARKED_PRESENT;
  }
  
  static inline int next_present_section_nr(int section_nr)
  {
  	do {
  		section_nr++;
  		if (present_section_nr(section_nr))
  			return section_nr;
  	} while ((section_nr < NR_MEM_SECTIONS) &&
  		 (section_nr <= __highest_present_section_nr));
  
  	return -1;
  }
  #define for_each_present_section_nr(start, section_nr)		\
  	for (section_nr = next_present_section_nr(start-1);	\
  	     ((section_nr >= 0) &&				\
  	      (section_nr < NR_MEM_SECTIONS) &&			\
  	      (section_nr <= __highest_present_section_nr));	\
  	     section_nr = next_present_section_nr(section_nr))
2dbb51c49   Mel Gorman   mm: make defensiv...
198
199
200
201
  /* Record a memory area against a node. */
  void __init memory_present(int nid, unsigned long start, unsigned long end)
  {
  	unsigned long pfn;
bead9a3ab   Ingo Molnar   mm: sparsemem mem...
202

7d7545295   Kirill A. Shutemov   mm/sparsemem: Fix...
203
204
205
  #ifdef CONFIG_SPARSEMEM_EXTREME
  	if (unlikely(!mem_section)) {
  		unsigned long size, align;
c86ee796f   Baoquan He   mm/sparse.c: wron...
206
  		size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
7d7545295   Kirill A. Shutemov   mm/sparsemem: Fix...
207
208
209
210
  		align = 1 << (INTERNODE_CACHE_SHIFT);
  		mem_section = memblock_virt_alloc(size, align);
  	}
  #endif
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
211
  	start &= PAGE_SECTION_MASK;
2dbb51c49   Mel Gorman   mm: make defensiv...
212
  	mminit_validate_memmodel_limits(&start, &end);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
213
214
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
  		unsigned long section = pfn_to_section_nr(pfn);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
215
216
217
  		struct mem_section *ms;
  
  		sparse_index_init(section, nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
218
  		set_section_nid(section, nid);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
219
220
  
  		ms = __nr_to_section(section);
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
221
  		if (!ms->section_mem_map) {
2d070eab2   Michal Hocko   mm: consider zone...
222
223
  			ms->section_mem_map = sparse_encode_early_nid(nid) |
  							SECTION_IS_ONLINE;
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
224
225
  			section_mark_present(ms);
  		}
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
226
227
228
229
230
231
232
233
234
235
236
237
  	}
  }
  
  /*
   * Only used by the i386 NUMA architecures, but relatively
   * generic code.
   */
  unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
  						     unsigned long end_pfn)
  {
  	unsigned long pfn;
  	unsigned long nr_pages = 0;
2dbb51c49   Mel Gorman   mm: make defensiv...
238
  	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
239
240
241
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		if (nid != early_pfn_to_nid(pfn))
  			continue;
540557b94   Andy Whitcroft   sparsemem: record...
242
  		if (pfn_present(pfn))
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
243
244
245
246
247
248
249
  			nr_pages += PAGES_PER_SECTION;
  	}
  
  	return nr_pages * sizeof(struct page);
  }
  
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
250
251
252
253
254
255
256
257
258
259
   * Subtle, we encode the real pfn into the mem_map such that
   * the identity pfn - section_mem_map will return the actual
   * physical page frame number.
   */
  static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
  {
  	return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
  }
  
  /*
ea01ea937   Badari Pulavarty   hotplug memory re...
260
   * Decode mem_map from the coded memmap
29751f699   Andy Whitcroft   [PATCH] sparsemem...
261
   */
29751f699   Andy Whitcroft   [PATCH] sparsemem...
262
263
  struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
  {
ea01ea937   Badari Pulavarty   hotplug memory re...
264
265
  	/* mask off the extra low bits of information */
  	coded_mem_map &= SECTION_MAP_MASK;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
266
267
  	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
  }
a3142c8e1   Yasunori Goto   Fix section misma...
268
  static int __meminit sparse_init_one_section(struct mem_section *ms,
5c0e30664   Mel Gorman   Fix corruption of...
269
270
  		unsigned long pnum, struct page *mem_map,
  		unsigned long *pageblock_bitmap)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
271
  {
540557b94   Andy Whitcroft   sparsemem: record...
272
  	if (!present_section(ms))
29751f699   Andy Whitcroft   [PATCH] sparsemem...
273
  		return -EINVAL;
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
274
  	ms->section_mem_map &= ~SECTION_MAP_MASK;
540557b94   Andy Whitcroft   sparsemem: record...
275
276
  	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
  							SECTION_HAS_MEM_MAP;
5c0e30664   Mel Gorman   Fix corruption of...
277
   	ms->pageblock_flags = pageblock_bitmap;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
278
279
280
  
  	return 1;
  }
047532787   Yasunori Goto   memory hotplug: r...
281
  unsigned long usemap_size(void)
5c0e30664   Mel Gorman   Fix corruption of...
282
  {
60a7a88db   Wei Yang   mm/sparse: refine...
283
  	return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
5c0e30664   Mel Gorman   Fix corruption of...
284
285
286
287
288
289
290
291
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
  static unsigned long *__kmalloc_section_usemap(void)
  {
  	return kmalloc(usemap_size(), GFP_KERNEL);
  }
  #endif /* CONFIG_MEMORY_HOTPLUG */
48c906823   Yasunori Goto   memory hotplug: a...
292
293
  #ifdef CONFIG_MEMORY_HOTREMOVE
  static unsigned long * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
294
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
238305bb4   Johannes Weiner   mm: remove sparse...
295
  					 unsigned long size)
48c906823   Yasunori Goto   memory hotplug: a...
296
  {
99ab7b194   Yinghai Lu   mm: sparse: fix u...
297
298
299
  	unsigned long goal, limit;
  	unsigned long *p;
  	int nid;
48c906823   Yasunori Goto   memory hotplug: a...
300
301
302
  	/*
  	 * A page may contain usemaps for other sections preventing the
  	 * page being freed and making a section unremovable while
c800bcd5f   Li Zhong   sparse: fix comment
303
  	 * other sections referencing the usemap remain active. Similarly,
48c906823   Yasunori Goto   memory hotplug: a...
304
305
306
307
308
309
  	 * a pgdat can prevent a section being removed. If section A
  	 * contains a pgdat and section B contains the usemap, both
  	 * sections become inter-dependent. This allocates usemaps
  	 * from the same section as the pgdat where possible to avoid
  	 * this problem.
  	 */
07b4e2bc9   Yinghai Lu   mm: sparse: fix s...
310
  	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
99ab7b194   Yinghai Lu   mm: sparse: fix u...
311
312
313
  	limit = goal + (1UL << PA_SECTION_SHIFT);
  	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
  again:
bb016b841   Santosh Shilimkar   mm/sparse: use me...
314
315
316
  	p = memblock_virt_alloc_try_nid_nopanic(size,
  						SMP_CACHE_BYTES, goal, limit,
  						nid);
99ab7b194   Yinghai Lu   mm: sparse: fix u...
317
318
319
320
321
  	if (!p && limit) {
  		limit = 0;
  		goto again;
  	}
  	return p;
48c906823   Yasunori Goto   memory hotplug: a...
322
323
324
325
326
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  	unsigned long usemap_snr, pgdat_snr;
4afaf6ea6   Kirill A. Shutemov   mm/sparsemem: All...
327
328
  	static unsigned long old_usemap_snr;
  	static unsigned long old_pgdat_snr;
48c906823   Yasunori Goto   memory hotplug: a...
329
330
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int usemap_nid;
4afaf6ea6   Kirill A. Shutemov   mm/sparsemem: All...
331
332
333
334
335
  	/* First call */
  	if (!old_usemap_snr) {
  		old_usemap_snr = NR_MEM_SECTIONS;
  		old_pgdat_snr = NR_MEM_SECTIONS;
  	}
48c906823   Yasunori Goto   memory hotplug: a...
336
337
338
339
340
341
342
343
344
345
346
347
348
349
  	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
  	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
  	if (usemap_snr == pgdat_snr)
  		return;
  
  	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
  		/* skip redundant message */
  		return;
  
  	old_usemap_snr = usemap_snr;
  	old_pgdat_snr = pgdat_snr;
  
  	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
  	if (usemap_nid != nid) {
1170532bb   Joe Perches   mm: convert print...
350
351
352
  		pr_info("node %d must be removed before remove section %ld
  ",
  			nid, usemap_snr);
48c906823   Yasunori Goto   memory hotplug: a...
353
354
355
356
357
358
359
360
  		return;
  	}
  	/*
  	 * There is a circular dependency.
  	 * Some platforms allow un-removable section because they will just
  	 * gather other removable sections for dynamic partitioning.
  	 * Just notify un-removable section's number here.
  	 */
1170532bb   Joe Perches   mm: convert print...
361
362
363
  	pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations
  ",
  		usemap_snr, pgdat_snr, nid);
48c906823   Yasunori Goto   memory hotplug: a...
364
365
366
  }
  #else
  static unsigned long * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
367
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
238305bb4   Johannes Weiner   mm: remove sparse...
368
  					 unsigned long size)
48c906823   Yasunori Goto   memory hotplug: a...
369
  {
bb016b841   Santosh Shilimkar   mm/sparse: use me...
370
  	return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
48c906823   Yasunori Goto   memory hotplug: a...
371
372
373
374
375
376
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  }
  #endif /* CONFIG_MEMORY_HOTREMOVE */
187320932   Wanpeng Li   mm/sparse: introd...
377
  static void __init sparse_early_usemaps_alloc_node(void *data,
a4322e1ba   Yinghai Lu   sparsemem: Put us...
378
379
380
  				 unsigned long pnum_begin,
  				 unsigned long pnum_end,
  				 unsigned long usemap_count, int nodeid)
5c0e30664   Mel Gorman   Fix corruption of...
381
  {
a4322e1ba   Yinghai Lu   sparsemem: Put us...
382
383
  	void *usemap;
  	unsigned long pnum;
187320932   Wanpeng Li   mm/sparse: introd...
384
  	unsigned long **usemap_map = (unsigned long **)data;
a4322e1ba   Yinghai Lu   sparsemem: Put us...
385
  	int size = usemap_size();
5c0e30664   Mel Gorman   Fix corruption of...
386

a4322e1ba   Yinghai Lu   sparsemem: Put us...
387
  	usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
238305bb4   Johannes Weiner   mm: remove sparse...
388
  							  size * usemap_count);
f5bf18fa2   Nishanth Aravamudan   bootmem/sparsemem...
389
  	if (!usemap) {
1170532bb   Joe Perches   mm: convert print...
390
391
  		pr_warn("%s: allocation failed
  ", __func__);
238305bb4   Johannes Weiner   mm: remove sparse...
392
  		return;
48c906823   Yasunori Goto   memory hotplug: a...
393
  	}
f5bf18fa2   Nishanth Aravamudan   bootmem/sparsemem...
394
395
396
397
398
399
  	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  		if (!present_section_nr(pnum))
  			continue;
  		usemap_map[pnum] = usemap;
  		usemap += size;
  		check_usemap_section_nr(nodeid, usemap_map[pnum]);
a4322e1ba   Yinghai Lu   sparsemem: Put us...
400
  	}
5c0e30664   Mel Gorman   Fix corruption of...
401
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
402
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
98f3cfc1d   Yasunori Goto   memory hotplug: H...
403
  struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
404
405
  {
  	struct page *map;
e48e67e08   Yinghai Lu   sparsemem: on no ...
406
  	unsigned long size;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
407
408
409
410
  
  	map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
  	if (map)
  		return map;
e48e67e08   Yinghai Lu   sparsemem: on no ...
411
  	size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
bb016b841   Santosh Shilimkar   mm/sparse: use me...
412
413
414
  	map = memblock_virt_alloc_try_nid(size,
  					  PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
  					  BOOTMEM_ALLOC_ACCESSIBLE, nid);
8f6aac419   Christoph Lameter   Generic Virtual M...
415
416
  	return map;
  }
9bdac9142   Yinghai Lu   sparsemem: Put me...
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
  void __init sparse_mem_maps_populate_node(struct page **map_map,
  					  unsigned long pnum_begin,
  					  unsigned long pnum_end,
  					  unsigned long map_count, int nodeid)
  {
  	void *map;
  	unsigned long pnum;
  	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
  
  	map = alloc_remap(nodeid, size * map_count);
  	if (map) {
  		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  			if (!present_section_nr(pnum))
  				continue;
  			map_map[pnum] = map;
  			map += size;
  		}
  		return;
  	}
  
  	size = PAGE_ALIGN(size);
bb016b841   Santosh Shilimkar   mm/sparse: use me...
438
439
440
  	map = memblock_virt_alloc_try_nid(size * map_count,
  					  PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
  					  BOOTMEM_ALLOC_ACCESSIBLE, nodeid);
9bdac9142   Yinghai Lu   sparsemem: Put me...
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
  	if (map) {
  		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  			if (!present_section_nr(pnum))
  				continue;
  			map_map[pnum] = map;
  			map += size;
  		}
  		return;
  	}
  
  	/* fallback */
  	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  		struct mem_section *ms;
  
  		if (!present_section_nr(pnum))
  			continue;
  		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
  		if (map_map[pnum])
  			continue;
  		ms = __nr_to_section(pnum);
1170532bb   Joe Perches   mm: convert print...
461
462
  		pr_err("%s: sparsemem memory map backing failed some memory will not be available
  ",
756a025f0   Joe Perches   mm: coalesce spli...
463
  		       __func__);
9bdac9142   Yinghai Lu   sparsemem: Put me...
464
465
466
  		ms->section_mem_map = 0;
  	}
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
467
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
81d0d950e   Yinghai Lu   sparsemem: Fix co...
468
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
187320932   Wanpeng Li   mm/sparse: introd...
469
  static void __init sparse_early_mem_maps_alloc_node(void *data,
9bdac9142   Yinghai Lu   sparsemem: Put me...
470
471
472
473
  				 unsigned long pnum_begin,
  				 unsigned long pnum_end,
  				 unsigned long map_count, int nodeid)
  {
187320932   Wanpeng Li   mm/sparse: introd...
474
  	struct page **map_map = (struct page **)data;
9bdac9142   Yinghai Lu   sparsemem: Put me...
475
476
477
  	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
  					 map_count, nodeid);
  }
81d0d950e   Yinghai Lu   sparsemem: Fix co...
478
  #else
9e5c6da71   Adrian Bunk   make mm/sparse.c:...
479
  static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
8f6aac419   Christoph Lameter   Generic Virtual M...
480
481
482
483
  {
  	struct page *map;
  	struct mem_section *ms = __nr_to_section(pnum);
  	int nid = sparse_early_nid(ms);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
484
  	map = sparse_mem_map_populate(pnum, nid);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
485
486
  	if (map)
  		return map;
1170532bb   Joe Perches   mm: convert print...
487
488
  	pr_err("%s: sparsemem memory map backing failed some memory will not be available
  ",
756a025f0   Joe Perches   mm: coalesce spli...
489
  	       __func__);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
490
  	ms->section_mem_map = 0;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
491
492
  	return NULL;
  }
9bdac9142   Yinghai Lu   sparsemem: Put me...
493
  #endif
29751f699   Andy Whitcroft   [PATCH] sparsemem...
494

3b32123d7   Gideon Israel Dsouza   mm: use macros fr...
495
  void __weak __meminit vmemmap_populate_print_last(void)
c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
496
497
  {
  }
a4322e1ba   Yinghai Lu   sparsemem: Put us...
498

187320932   Wanpeng Li   mm/sparse: introd...
499
500
501
502
503
504
505
506
507
508
509
510
  /**
   *  alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap
   *  @map: usemap_map for pageblock flags or mmap_map for vmemmap
   */
  static void __init alloc_usemap_and_memmap(void (*alloc_func)
  					(void *, unsigned long, unsigned long,
  					unsigned long, int), void *data)
  {
  	unsigned long pnum;
  	unsigned long map_count;
  	int nodeid_begin = 0;
  	unsigned long pnum_begin = 0;
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
511
  	for_each_present_section_nr(0, pnum) {
187320932   Wanpeng Li   mm/sparse: introd...
512
  		struct mem_section *ms;
187320932   Wanpeng Li   mm/sparse: introd...
513
514
515
516
517
518
  		ms = __nr_to_section(pnum);
  		nodeid_begin = sparse_early_nid(ms);
  		pnum_begin = pnum;
  		break;
  	}
  	map_count = 1;
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
519
  	for_each_present_section_nr(pnum_begin + 1, pnum) {
187320932   Wanpeng Li   mm/sparse: introd...
520
521
  		struct mem_section *ms;
  		int nodeid;
187320932   Wanpeng Li   mm/sparse: introd...
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
  		ms = __nr_to_section(pnum);
  		nodeid = sparse_early_nid(ms);
  		if (nodeid == nodeid_begin) {
  			map_count++;
  			continue;
  		}
  		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
  		alloc_func(data, pnum_begin, pnum,
  						map_count, nodeid_begin);
  		/* new start, update count etc*/
  		nodeid_begin = nodeid;
  		pnum_begin = pnum;
  		map_count = 1;
  	}
  	/* ok, last chunk */
  	alloc_func(data, pnum_begin, NR_MEM_SECTIONS,
  						map_count, nodeid_begin);
  }
193faea92   Stephen Rothwell   Move three functi...
540
541
542
543
544
545
546
547
  /*
   * Allocate the accumulated non-linear sections, allocate a mem_map
   * for each and record the physical to section mapping.
   */
  void __init sparse_init(void)
  {
  	unsigned long pnum;
  	struct page *map;
5c0e30664   Mel Gorman   Fix corruption of...
548
  	unsigned long *usemap;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
549
  	unsigned long **usemap_map;
81d0d950e   Yinghai Lu   sparsemem: Fix co...
550
  	int size;
81d0d950e   Yinghai Lu   sparsemem: Fix co...
551
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
81d0d950e   Yinghai Lu   sparsemem: Fix co...
552
553
554
  	int size2;
  	struct page **map_map;
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
555

55878e88c   Cody P Schafer   sparsemem: add BU...
556
557
  	/* see include/linux/mmzone.h 'struct mem_section' definition */
  	BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section)));
ca57df79d   Xishi Qiu   mm: setup pageblo...
558
559
  	/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
  	set_pageblock_order();
e123dd3f0   Yinghai Lu   mm: make mem_map ...
560
561
562
563
564
565
  	/*
  	 * map is using big page (aka 2M in x86 64 bit)
  	 * usemap is less one page (aka 24 bytes)
  	 * so alloc 2M (with 2M align) and 24 bytes in turn will
  	 * make next 2M slip to one more 2M later.
  	 * then in big system, the memory will have a lot of holes...
25985edce   Lucas De Marchi   Fix common misspe...
566
  	 * here try to allocate 2M pages continuously.
e123dd3f0   Yinghai Lu   mm: make mem_map ...
567
568
569
570
571
  	 *
  	 * powerpc need to call sparse_init_one_section right after each
  	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
  	 */
  	size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
bb016b841   Santosh Shilimkar   mm/sparse: use me...
572
  	usemap_map = memblock_virt_alloc(size, 0);
e123dd3f0   Yinghai Lu   mm: make mem_map ...
573
574
575
  	if (!usemap_map)
  		panic("can not allocate usemap_map
  ");
187320932   Wanpeng Li   mm/sparse: introd...
576
577
  	alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
  							(void *)usemap_map);
193faea92   Stephen Rothwell   Move three functi...
578

9bdac9142   Yinghai Lu   sparsemem: Put me...
579
580
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
  	size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
bb016b841   Santosh Shilimkar   mm/sparse: use me...
581
  	map_map = memblock_virt_alloc(size2, 0);
9bdac9142   Yinghai Lu   sparsemem: Put me...
582
583
584
  	if (!map_map)
  		panic("can not allocate map_map
  ");
187320932   Wanpeng Li   mm/sparse: introd...
585
586
  	alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node,
  							(void *)map_map);
9bdac9142   Yinghai Lu   sparsemem: Put me...
587
  #endif
c4e1be9ec   Dave Hansen   mm, sparsemem: br...
588
  	for_each_present_section_nr(0, pnum) {
e123dd3f0   Yinghai Lu   mm: make mem_map ...
589
  		usemap = usemap_map[pnum];
5c0e30664   Mel Gorman   Fix corruption of...
590
591
  		if (!usemap)
  			continue;
9bdac9142   Yinghai Lu   sparsemem: Put me...
592
593
594
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
  		map = map_map[pnum];
  #else
e123dd3f0   Yinghai Lu   mm: make mem_map ...
595
  		map = sparse_early_mem_map_alloc(pnum);
9bdac9142   Yinghai Lu   sparsemem: Put me...
596
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
597
598
  		if (!map)
  			continue;
5c0e30664   Mel Gorman   Fix corruption of...
599
600
  		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
  								usemap);
193faea92   Stephen Rothwell   Move three functi...
601
  	}
e123dd3f0   Yinghai Lu   mm: make mem_map ...
602

c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
603
  	vmemmap_populate_print_last();
9bdac9142   Yinghai Lu   sparsemem: Put me...
604
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
bb016b841   Santosh Shilimkar   mm/sparse: use me...
605
  	memblock_free_early(__pa(map_map), size2);
9bdac9142   Yinghai Lu   sparsemem: Put me...
606
  #endif
bb016b841   Santosh Shilimkar   mm/sparse: use me...
607
  	memblock_free_early(__pa(usemap_map), size);
193faea92   Stephen Rothwell   Move three functi...
608
609
610
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
2d070eab2   Michal Hocko   mm: consider zone...
611
612
613
614
615
616
617
  
  /* Mark all memory sections within the pfn range as online */
  void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
b4ccec41a   Michal Hocko   mm/sparse.c: fix ...
618
  		unsigned long section_nr = pfn_to_section_nr(pfn);
2d070eab2   Michal Hocko   mm: consider zone...
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
  		struct mem_section *ms;
  
  		/* onlining code should never touch invalid ranges */
  		if (WARN_ON(!valid_section_nr(section_nr)))
  			continue;
  
  		ms = __nr_to_section(section_nr);
  		ms->section_mem_map |= SECTION_IS_ONLINE;
  	}
  }
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  /* Mark all memory sections within the pfn range as online */
  void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
8ee7dabb3   Pavel Tatashin   mm: sections are ...
637
  		unsigned long section_nr = pfn_to_section_nr(pfn);
2d070eab2   Michal Hocko   mm: consider zone...
638
639
640
641
642
643
644
645
646
647
648
649
650
651
  		struct mem_section *ms;
  
  		/*
  		 * TODO this needs some double checking. Offlining code makes
  		 * sure to check pfn_valid but those checks might be just bogus
  		 */
  		if (WARN_ON(!valid_section_nr(section_nr)))
  			continue;
  
  		ms = __nr_to_section(section_nr);
  		ms->section_mem_map &= ~SECTION_IS_ONLINE;
  	}
  }
  #endif
98f3cfc1d   Yasunori Goto   memory hotplug: H...
652
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
85b35feae   Zhang Yanfei   mm/sparsemem: use...
653
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
98f3cfc1d   Yasunori Goto   memory hotplug: H...
654
655
656
657
  {
  	/* This will make the necessary allocations eventually. */
  	return sparse_mem_map_populate(pnum, nid);
  }
85b35feae   Zhang Yanfei   mm/sparsemem: use...
658
  static void __kfree_section_memmap(struct page *memmap)
98f3cfc1d   Yasunori Goto   memory hotplug: H...
659
  {
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
660
  	unsigned long start = (unsigned long)memmap;
85b35feae   Zhang Yanfei   mm/sparsemem: use...
661
  	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
662
663
  
  	vmemmap_free(start, end);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
664
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
665
  #ifdef CONFIG_MEMORY_HOTREMOVE
81556b025   Zhang Yanfei   mm/sparsemem: fix...
666
  static void free_map_bootmem(struct page *memmap)
0c0a4a517   Yasunori Goto   memory hotplug: f...
667
  {
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
668
  	unsigned long start = (unsigned long)memmap;
81556b025   Zhang Yanfei   mm/sparsemem: fix...
669
  	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
0aad818b2   Johannes Weiner   sparse-vmemmap: s...
670
671
  
  	vmemmap_free(start, end);
0c0a4a517   Yasunori Goto   memory hotplug: f...
672
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
673
  #endif /* CONFIG_MEMORY_HOTREMOVE */
98f3cfc1d   Yasunori Goto   memory hotplug: H...
674
  #else
85b35feae   Zhang Yanfei   mm/sparsemem: use...
675
  static struct page *__kmalloc_section_memmap(void)
0b0acbec1   Dave Hansen   [PATCH] memory ho...
676
677
  {
  	struct page *page, *ret;
85b35feae   Zhang Yanfei   mm/sparsemem: use...
678
  	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
679

f2d0aa5bf   Yasunori Goto   [PATCH] memory ho...
680
  	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
0b0acbec1   Dave Hansen   [PATCH] memory ho...
681
682
683
684
685
686
687
688
689
690
691
  	if (page)
  		goto got_map_page;
  
  	ret = vmalloc(memmap_size);
  	if (ret)
  		goto got_map_ptr;
  
  	return NULL;
  got_map_page:
  	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
  got_map_ptr:
0b0acbec1   Dave Hansen   [PATCH] memory ho...
692
693
694
  
  	return ret;
  }
85b35feae   Zhang Yanfei   mm/sparsemem: use...
695
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
98f3cfc1d   Yasunori Goto   memory hotplug: H...
696
  {
85b35feae   Zhang Yanfei   mm/sparsemem: use...
697
  	return __kmalloc_section_memmap();
98f3cfc1d   Yasunori Goto   memory hotplug: H...
698
  }
85b35feae   Zhang Yanfei   mm/sparsemem: use...
699
  static void __kfree_section_memmap(struct page *memmap)
0b0acbec1   Dave Hansen   [PATCH] memory ho...
700
  {
9e2779fa2   Christoph Lameter   is_vmalloc_addr()...
701
  	if (is_vmalloc_addr(memmap))
0b0acbec1   Dave Hansen   [PATCH] memory ho...
702
703
704
  		vfree(memmap);
  	else
  		free_pages((unsigned long)memmap,
85b35feae   Zhang Yanfei   mm/sparsemem: use...
705
  			   get_order(sizeof(struct page) * PAGES_PER_SECTION));
0b0acbec1   Dave Hansen   [PATCH] memory ho...
706
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
707

4edd7ceff   David Rientjes   mm, hotplug: avoi...
708
  #ifdef CONFIG_MEMORY_HOTREMOVE
81556b025   Zhang Yanfei   mm/sparsemem: fix...
709
  static void free_map_bootmem(struct page *memmap)
0c0a4a517   Yasunori Goto   memory hotplug: f...
710
711
  {
  	unsigned long maps_section_nr, removing_section_nr, i;
81556b025   Zhang Yanfei   mm/sparsemem: fix...
712
  	unsigned long magic, nr_pages;
ae64ffcac   Jianguo Wu   mm/vmemmap: fix w...
713
  	struct page *page = virt_to_page(memmap);
0c0a4a517   Yasunori Goto   memory hotplug: f...
714

81556b025   Zhang Yanfei   mm/sparsemem: fix...
715
716
  	nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
  		>> PAGE_SHIFT;
0c0a4a517   Yasunori Goto   memory hotplug: f...
717
  	for (i = 0; i < nr_pages; i++, page++) {
ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
718
  		magic = (unsigned long) page->freelist;
0c0a4a517   Yasunori Goto   memory hotplug: f...
719
720
721
722
  
  		BUG_ON(magic == NODE_INFO);
  
  		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
857e522a0   Yasuaki Ishimatsu   mm/sparse: use pa...
723
  		removing_section_nr = page_private(page);
0c0a4a517   Yasunori Goto   memory hotplug: f...
724
725
726
727
728
729
730
731
732
733
734
735
736
  
  		/*
  		 * When this function is called, the removing section is
  		 * logical offlined state. This means all pages are isolated
  		 * from page allocator. If removing section's memmap is placed
  		 * on the same section, it must not be freed.
  		 * If it is freed, page allocator may allocate it which will
  		 * be removed physically soon.
  		 */
  		if (maps_section_nr != removing_section_nr)
  			put_page_bootmem(page);
  	}
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
737
  #endif /* CONFIG_MEMORY_HOTREMOVE */
98f3cfc1d   Yasunori Goto   memory hotplug: H...
738
  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
0b0acbec1   Dave Hansen   [PATCH] memory ho...
739

29751f699   Andy Whitcroft   [PATCH] sparsemem...
740
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
741
742
743
744
   * returns the number of sections whose mem_maps were properly
   * set.  If this is <=0, then that means that the passed-in
   * map was not consumed and must be freed.
   */
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
745
  int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
746
  {
0b0acbec1   Dave Hansen   [PATCH] memory ho...
747
  	unsigned long section_nr = pfn_to_section_nr(start_pfn);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
748
749
  	struct mem_section *ms;
  	struct page *memmap;
5c0e30664   Mel Gorman   Fix corruption of...
750
  	unsigned long *usemap;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
751
752
  	unsigned long flags;
  	int ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
753

0b0acbec1   Dave Hansen   [PATCH] memory ho...
754
755
756
757
  	/*
  	 * no locking for this, because it does its own
  	 * plus, it does a kmalloc
  	 */
bbd068259   WANG Cong   mm/sparse.c: impr...
758
759
760
  	ret = sparse_index_init(section_nr, pgdat->node_id);
  	if (ret < 0 && ret != -EEXIST)
  		return ret;
85b35feae   Zhang Yanfei   mm/sparsemem: use...
761
  	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
bbd068259   WANG Cong   mm/sparse.c: impr...
762
763
  	if (!memmap)
  		return -ENOMEM;
5c0e30664   Mel Gorman   Fix corruption of...
764
  	usemap = __kmalloc_section_usemap();
bbd068259   WANG Cong   mm/sparse.c: impr...
765
  	if (!usemap) {
85b35feae   Zhang Yanfei   mm/sparsemem: use...
766
  		__kfree_section_memmap(memmap);
bbd068259   WANG Cong   mm/sparse.c: impr...
767
768
  		return -ENOMEM;
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
769
770
  
  	pgdat_resize_lock(pgdat, &flags);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
771

0b0acbec1   Dave Hansen   [PATCH] memory ho...
772
773
774
775
776
  	ms = __pfn_to_section(start_pfn);
  	if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
  		ret = -EEXIST;
  		goto out;
  	}
5c0e30664   Mel Gorman   Fix corruption of...
777

85b35feae   Zhang Yanfei   mm/sparsemem: use...
778
  	memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
3ac19f8ef   Wen Congyang   memory-hotplug, m...
779

c4e1be9ec   Dave Hansen   mm, sparsemem: br...
780
  	section_mark_present(ms);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
781

5c0e30664   Mel Gorman   Fix corruption of...
782
  	ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
783

0b0acbec1   Dave Hansen   [PATCH] memory ho...
784
785
  out:
  	pgdat_resize_unlock(pgdat, &flags);
bbd068259   WANG Cong   mm/sparse.c: impr...
786
787
  	if (ret <= 0) {
  		kfree(usemap);
85b35feae   Zhang Yanfei   mm/sparsemem: use...
788
  		__kfree_section_memmap(memmap);
bbd068259   WANG Cong   mm/sparse.c: impr...
789
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
790
  	return ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
791
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
792

f3deb6872   Zhang Yanfei   mm/sparse.c: put ...
793
  #ifdef CONFIG_MEMORY_HOTREMOVE
95a4774d0   Wen Congyang   memory-hotplug: u...
794
795
796
797
798
799
800
  #ifdef CONFIG_MEMORY_FAILURE
  static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
  {
  	int i;
  
  	if (!memmap)
  		return;
4b94ffdc4   Dan Williams   x86, mm: introduc...
801
  	for (i = 0; i < nr_pages; i++) {
95a4774d0   Wen Congyang   memory-hotplug: u...
802
  		if (PageHWPoison(&memmap[i])) {
293c07e31   Xishi Qiu   memory-failure: u...
803
  			atomic_long_sub(1, &num_poisoned_pages);
95a4774d0   Wen Congyang   memory-hotplug: u...
804
805
806
807
808
809
810
811
812
  			ClearPageHWPoison(&memmap[i]);
  		}
  	}
  }
  #else
  static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
  {
  }
  #endif
4edd7ceff   David Rientjes   mm, hotplug: avoi...
813
814
815
  static void free_section_usemap(struct page *memmap, unsigned long *usemap)
  {
  	struct page *usemap_page;
4edd7ceff   David Rientjes   mm, hotplug: avoi...
816
817
818
819
820
821
822
823
824
825
826
  
  	if (!usemap)
  		return;
  
  	usemap_page = virt_to_page(usemap);
  	/*
  	 * Check to see if allocation came from hot-plug-add
  	 */
  	if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
  		kfree(usemap);
  		if (memmap)
85b35feae   Zhang Yanfei   mm/sparsemem: use...
827
  			__kfree_section_memmap(memmap);
4edd7ceff   David Rientjes   mm, hotplug: avoi...
828
829
830
831
832
833
834
  		return;
  	}
  
  	/*
  	 * The usemap came from bootmem. This is packed with other usemaps
  	 * on the section which has pgdat at boot time. Just keep it as is now.
  	 */
81556b025   Zhang Yanfei   mm/sparsemem: fix...
835
836
  	if (memmap)
  		free_map_bootmem(memmap);
4edd7ceff   David Rientjes   mm, hotplug: avoi...
837
  }
4b94ffdc4   Dan Williams   x86, mm: introduc...
838
839
  void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
  		unsigned long map_offset)
ea01ea937   Badari Pulavarty   hotplug memory re...
840
841
  {
  	struct page *memmap = NULL;
cd099682e   Tang Chen   memory-hotplug: m...
842
843
  	unsigned long *usemap = NULL, flags;
  	struct pglist_data *pgdat = zone->zone_pgdat;
ea01ea937   Badari Pulavarty   hotplug memory re...
844

cd099682e   Tang Chen   memory-hotplug: m...
845
  	pgdat_resize_lock(pgdat, &flags);
ea01ea937   Badari Pulavarty   hotplug memory re...
846
847
848
849
850
851
852
  	if (ms->section_mem_map) {
  		usemap = ms->pageblock_flags;
  		memmap = sparse_decode_mem_map(ms->section_mem_map,
  						__section_nr(ms));
  		ms->section_mem_map = 0;
  		ms->pageblock_flags = NULL;
  	}
cd099682e   Tang Chen   memory-hotplug: m...
853
  	pgdat_resize_unlock(pgdat, &flags);
ea01ea937   Badari Pulavarty   hotplug memory re...
854

4b94ffdc4   Dan Williams   x86, mm: introduc...
855
856
  	clear_hwpoisoned_pages(memmap + map_offset,
  			PAGES_PER_SECTION - map_offset);
ea01ea937   Badari Pulavarty   hotplug memory re...
857
858
  	free_section_usemap(memmap, usemap);
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
859
860
  #endif /* CONFIG_MEMORY_HOTREMOVE */
  #endif /* CONFIG_MEMORY_HOTPLUG */