Blame view

mm/sparse.c 20.5 KB
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
1
2
3
  /*
   * sparse memory mappings.
   */
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
4
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
5
  #include <linux/slab.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
6
7
  #include <linux/mmzone.h>
  #include <linux/bootmem.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
8
  #include <linux/highmem.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
9
  #include <linux/export.h>
28ae55c98   Dave Hansen   [PATCH] sparsemem...
10
  #include <linux/spinlock.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
11
  #include <linux/vmalloc.h>
0c0a4a517   Yasunori Goto   memory hotplug: f...
12
  #include "internal.h"
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
13
  #include <asm/dma.h>
8f6aac419   Christoph Lameter   Generic Virtual M...
14
15
  #include <asm/pgalloc.h>
  #include <asm/pgtable.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
16
17
18
19
20
21
  
  /*
   * Permanent SPARSEMEM data:
   *
   * 1) mem_section	- memory sections, mem_map's for valid memory
   */
3e347261a   Bob Picco   [PATCH] sparsemem...
22
  #ifdef CONFIG_SPARSEMEM_EXTREME
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
23
  struct mem_section *mem_section[NR_SECTION_ROOTS]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
24
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
25
26
  #else
  struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
27
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
28
29
  #endif
  EXPORT_SYMBOL(mem_section);
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
30
31
32
33
34
35
36
37
38
39
40
  #ifdef NODE_NOT_IN_PAGE_FLAGS
  /*
   * If we did not store the node number in the page then we have to
   * do a lookup in the section_to_node_table in order to find which
   * node the page belongs to.
   */
  #if MAX_NUMNODES <= 256
  static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #else
  static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #endif
33dd4e0ec   Ian Campbell   mm: make some str...
41
  int page_to_nid(const struct page *page)
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
42
43
44
45
  {
  	return section_to_node_table[page_to_section(page)];
  }
  EXPORT_SYMBOL(page_to_nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
46
47
48
49
50
51
52
53
54
  
  static void set_section_nid(unsigned long section_nr, int nid)
  {
  	section_to_node_table[section_nr] = nid;
  }
  #else /* !NODE_NOT_IN_PAGE_FLAGS */
  static inline void set_section_nid(unsigned long section_nr, int nid)
  {
  }
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
55
  #endif
3e347261a   Bob Picco   [PATCH] sparsemem...
56
  #ifdef CONFIG_SPARSEMEM_EXTREME
577a32f62   Sam Ravnborg   mm: fix section m...
57
  static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
28ae55c98   Dave Hansen   [PATCH] sparsemem...
58
59
60
61
  {
  	struct mem_section *section = NULL;
  	unsigned long array_size = SECTIONS_PER_ROOT *
  				   sizeof(struct mem_section);
f52407ce2   Shaohua Li   memory hotplug: a...
62
63
64
65
66
67
  	if (slab_is_available()) {
  		if (node_state(nid, N_HIGH_MEMORY))
  			section = kmalloc_node(array_size, GFP_KERNEL, nid);
  		else
  			section = kmalloc(array_size, GFP_KERNEL);
  	} else
46a66eecd   Mike Kravetz   [PATCH] sparsemem...
68
  		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
28ae55c98   Dave Hansen   [PATCH] sparsemem...
69
70
71
72
73
  
  	if (section)
  		memset(section, 0, array_size);
  
  	return section;
3e347261a   Bob Picco   [PATCH] sparsemem...
74
  }
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
75

a3142c8e1   Yasunori Goto   Fix section misma...
76
  static int __meminit sparse_index_init(unsigned long section_nr, int nid)
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
77
  {
34af946a2   Ingo Molnar   [PATCH] spin/rwlo...
78
  	static DEFINE_SPINLOCK(index_init_lock);
28ae55c98   Dave Hansen   [PATCH] sparsemem...
79
80
81
  	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  	struct mem_section *section;
  	int ret = 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
82
83
  
  	if (mem_section[root])
28ae55c98   Dave Hansen   [PATCH] sparsemem...
84
  		return -EEXIST;
3e347261a   Bob Picco   [PATCH] sparsemem...
85

28ae55c98   Dave Hansen   [PATCH] sparsemem...
86
  	section = sparse_index_alloc(nid);
af0cd5a7c   WANG Cong   mm/sparse.c: chec...
87
88
  	if (!section)
  		return -ENOMEM;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
89
90
91
92
93
  	/*
  	 * This lock keeps two different sections from
  	 * reallocating for the same index
  	 */
  	spin_lock(&index_init_lock);
3e347261a   Bob Picco   [PATCH] sparsemem...
94

28ae55c98   Dave Hansen   [PATCH] sparsemem...
95
96
97
98
99
100
101
102
103
104
105
106
107
108
  	if (mem_section[root]) {
  		ret = -EEXIST;
  		goto out;
  	}
  
  	mem_section[root] = section;
  out:
  	spin_unlock(&index_init_lock);
  	return ret;
  }
  #else /* !SPARSEMEM_EXTREME */
  static inline int sparse_index_init(unsigned long section_nr, int nid)
  {
  	return 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
109
  }
28ae55c98   Dave Hansen   [PATCH] sparsemem...
110
  #endif
4ca644d97   Dave Hansen   [PATCH] memory ho...
111
112
  /*
   * Although written for the SPARSEMEM_EXTREME case, this happens
cd881a6b2   Andy Whitcroft   sparsemem: clean ...
113
   * to also work for the flat array case because
4ca644d97   Dave Hansen   [PATCH] memory ho...
114
115
116
117
118
119
   * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
   */
  int __section_nr(struct mem_section* ms)
  {
  	unsigned long root_nr;
  	struct mem_section* root;
12783b002   Mike Kravetz   [PATCH] SPARSEMEM...
120
121
  	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
  		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
4ca644d97   Dave Hansen   [PATCH] memory ho...
122
123
124
125
126
127
128
129
130
  		if (!root)
  			continue;
  
  		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
  		     break;
  	}
  
  	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
  }
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
  /*
   * During early boot, before section_mem_map is used for an actual
   * mem_map, we use section_mem_map to store the section's NUMA
   * node.  This keeps us from having to use another data structure.  The
   * node information is cleared just before we store the real mem_map.
   */
  static inline unsigned long sparse_encode_early_nid(int nid)
  {
  	return (nid << SECTION_NID_SHIFT);
  }
  
  static inline int sparse_early_nid(struct mem_section *section)
  {
  	return (section->section_mem_map >> SECTION_NID_SHIFT);
  }
2dbb51c49   Mel Gorman   mm: make defensiv...
146
147
148
  /* Validate the physical addressing limitations of the model */
  void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
  						unsigned long *end_pfn)
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
149
  {
2dbb51c49   Mel Gorman   mm: make defensiv...
150
  	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
151

bead9a3ab   Ingo Molnar   mm: sparsemem mem...
152
153
154
155
  	/*
  	 * Sanity checks - do not allow an architecture to pass
  	 * in larger pfns than the maximum scope of sparsemem:
  	 */
2dbb51c49   Mel Gorman   mm: make defensiv...
156
157
158
159
160
161
162
163
  	if (*start_pfn > max_sparsemem_pfn) {
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*start_pfn = max_sparsemem_pfn;
  		*end_pfn = max_sparsemem_pfn;
ef161a986   Cyrill Gorcunov   mm: mminit_valida...
164
  	} else if (*end_pfn > max_sparsemem_pfn) {
2dbb51c49   Mel Gorman   mm: make defensiv...
165
166
167
168
169
170
171
172
173
174
175
176
177
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"End of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*end_pfn = max_sparsemem_pfn;
  	}
  }
  
  /* Record a memory area against a node. */
  void __init memory_present(int nid, unsigned long start, unsigned long end)
  {
  	unsigned long pfn;
bead9a3ab   Ingo Molnar   mm: sparsemem mem...
178

d41dee369   Andy Whitcroft   [PATCH] sparsemem...
179
  	start &= PAGE_SECTION_MASK;
2dbb51c49   Mel Gorman   mm: make defensiv...
180
  	mminit_validate_memmodel_limits(&start, &end);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
181
182
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
  		unsigned long section = pfn_to_section_nr(pfn);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
183
184
185
  		struct mem_section *ms;
  
  		sparse_index_init(section, nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
186
  		set_section_nid(section, nid);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
187
188
189
  
  		ms = __nr_to_section(section);
  		if (!ms->section_mem_map)
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
190
191
  			ms->section_mem_map = sparse_encode_early_nid(nid) |
  							SECTION_MARKED_PRESENT;
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
192
193
194
195
196
197
198
199
200
201
202
203
  	}
  }
  
  /*
   * Only used by the i386 NUMA architecures, but relatively
   * generic code.
   */
  unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
  						     unsigned long end_pfn)
  {
  	unsigned long pfn;
  	unsigned long nr_pages = 0;
2dbb51c49   Mel Gorman   mm: make defensiv...
204
  	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
205
206
207
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		if (nid != early_pfn_to_nid(pfn))
  			continue;
540557b94   Andy Whitcroft   sparsemem: record...
208
  		if (pfn_present(pfn))
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
209
210
211
212
213
214
215
  			nr_pages += PAGES_PER_SECTION;
  	}
  
  	return nr_pages * sizeof(struct page);
  }
  
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
216
217
218
219
220
221
222
223
224
225
   * Subtle, we encode the real pfn into the mem_map such that
   * the identity pfn - section_mem_map will return the actual
   * physical page frame number.
   */
  static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
  {
  	return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
  }
  
  /*
ea01ea937   Badari Pulavarty   hotplug memory re...
226
   * Decode mem_map from the coded memmap
29751f699   Andy Whitcroft   [PATCH] sparsemem...
227
   */
29751f699   Andy Whitcroft   [PATCH] sparsemem...
228
229
  struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
  {
ea01ea937   Badari Pulavarty   hotplug memory re...
230
231
  	/* mask off the extra low bits of information */
  	coded_mem_map &= SECTION_MAP_MASK;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
232
233
  	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
  }
a3142c8e1   Yasunori Goto   Fix section misma...
234
  static int __meminit sparse_init_one_section(struct mem_section *ms,
5c0e30664   Mel Gorman   Fix corruption of...
235
236
  		unsigned long pnum, struct page *mem_map,
  		unsigned long *pageblock_bitmap)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
237
  {
540557b94   Andy Whitcroft   sparsemem: record...
238
  	if (!present_section(ms))
29751f699   Andy Whitcroft   [PATCH] sparsemem...
239
  		return -EINVAL;
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
240
  	ms->section_mem_map &= ~SECTION_MAP_MASK;
540557b94   Andy Whitcroft   sparsemem: record...
241
242
  	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
  							SECTION_HAS_MEM_MAP;
5c0e30664   Mel Gorman   Fix corruption of...
243
   	ms->pageblock_flags = pageblock_bitmap;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
244
245
246
  
  	return 1;
  }
047532787   Yasunori Goto   memory hotplug: r...
247
  unsigned long usemap_size(void)
5c0e30664   Mel Gorman   Fix corruption of...
248
249
250
251
252
253
254
255
256
257
258
259
260
  {
  	unsigned long size_bytes;
  	size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
  	size_bytes = roundup(size_bytes, sizeof(unsigned long));
  	return size_bytes;
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
  static unsigned long *__kmalloc_section_usemap(void)
  {
  	return kmalloc(usemap_size(), GFP_KERNEL);
  }
  #endif /* CONFIG_MEMORY_HOTPLUG */
48c906823   Yasunori Goto   memory hotplug: a...
261
262
  #ifdef CONFIG_MEMORY_HOTREMOVE
  static unsigned long * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
263
264
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
  					 unsigned long count)
48c906823   Yasunori Goto   memory hotplug: a...
265
266
267
268
269
270
271
272
273
274
275
276
277
278
  {
  	unsigned long section_nr;
  
  	/*
  	 * A page may contain usemaps for other sections preventing the
  	 * page being freed and making a section unremovable while
  	 * other sections referencing the usemap retmain active. Similarly,
  	 * a pgdat can prevent a section being removed. If section A
  	 * contains a pgdat and section B contains the usemap, both
  	 * sections become inter-dependent. This allocates usemaps
  	 * from the same section as the pgdat where possible to avoid
  	 * this problem.
  	 */
  	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
a4322e1ba   Yinghai Lu   sparsemem: Put us...
279
  	return alloc_bootmem_section(usemap_size() * count, section_nr);
48c906823   Yasunori Goto   memory hotplug: a...
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  	unsigned long usemap_snr, pgdat_snr;
  	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
  	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int usemap_nid;
  
  	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
  	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
  	if (usemap_snr == pgdat_snr)
  		return;
  
  	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
  		/* skip redundant message */
  		return;
  
  	old_usemap_snr = usemap_snr;
  	old_pgdat_snr = pgdat_snr;
  
  	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
  	if (usemap_nid != nid) {
  		printk(KERN_INFO
  		       "node %d must be removed before remove section %ld
  ",
  		       nid, usemap_snr);
  		return;
  	}
  	/*
  	 * There is a circular dependency.
  	 * Some platforms allow un-removable section because they will just
  	 * gather other removable sections for dynamic partitioning.
  	 * Just notify un-removable section's number here.
  	 */
  	printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr,
  	       pgdat_snr, nid);
  	printk(KERN_CONT
  	       " have a circular dependency on usemap and pgdat allocations
  ");
  }
  #else
  static unsigned long * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
324
325
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
  					 unsigned long count)
48c906823   Yasunori Goto   memory hotplug: a...
326
327
328
329
330
331
332
333
  {
  	return NULL;
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  }
  #endif /* CONFIG_MEMORY_HOTREMOVE */
a4322e1ba   Yinghai Lu   sparsemem: Put us...
334
335
336
337
  static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
  				 unsigned long pnum_begin,
  				 unsigned long pnum_end,
  				 unsigned long usemap_count, int nodeid)
5c0e30664   Mel Gorman   Fix corruption of...
338
  {
a4322e1ba   Yinghai Lu   sparsemem: Put us...
339
340
341
  	void *usemap;
  	unsigned long pnum;
  	int size = usemap_size();
5c0e30664   Mel Gorman   Fix corruption of...
342

a4322e1ba   Yinghai Lu   sparsemem: Put us...
343
344
  	usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
  								 usemap_count);
48c906823   Yasunori Goto   memory hotplug: a...
345
  	if (usemap) {
a4322e1ba   Yinghai Lu   sparsemem: Put us...
346
347
348
349
350
351
352
  		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  			if (!present_section_nr(pnum))
  				continue;
  			usemap_map[pnum] = usemap;
  			usemap += size;
  		}
  		return;
48c906823   Yasunori Goto   memory hotplug: a...
353
  	}
a4322e1ba   Yinghai Lu   sparsemem: Put us...
354
355
356
357
358
359
360
361
362
363
364
  	usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
  	if (usemap) {
  		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  			if (!present_section_nr(pnum))
  				continue;
  			usemap_map[pnum] = usemap;
  			usemap += size;
  			check_usemap_section_nr(nodeid, usemap_map[pnum]);
  		}
  		return;
  	}
5c0e30664   Mel Gorman   Fix corruption of...
365

d40cee245   Harvey Harrison   mm: remove remain...
366
367
  	printk(KERN_WARNING "%s: allocation failed
  ", __func__);
5c0e30664   Mel Gorman   Fix corruption of...
368
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
369
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
98f3cfc1d   Yasunori Goto   memory hotplug: H...
370
  struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
371
372
  {
  	struct page *map;
e48e67e08   Yinghai Lu   sparsemem: on no ...
373
  	unsigned long size;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
374
375
376
377
  
  	map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
  	if (map)
  		return map;
e48e67e08   Yinghai Lu   sparsemem: on no ...
378
379
380
  	size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
  	map = __alloc_bootmem_node_high(NODE_DATA(nid), size,
  					 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
8f6aac419   Christoph Lameter   Generic Virtual M...
381
382
  	return map;
  }
9bdac9142   Yinghai Lu   sparsemem: Put me...
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
  void __init sparse_mem_maps_populate_node(struct page **map_map,
  					  unsigned long pnum_begin,
  					  unsigned long pnum_end,
  					  unsigned long map_count, int nodeid)
  {
  	void *map;
  	unsigned long pnum;
  	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
  
  	map = alloc_remap(nodeid, size * map_count);
  	if (map) {
  		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  			if (!present_section_nr(pnum))
  				continue;
  			map_map[pnum] = map;
  			map += size;
  		}
  		return;
  	}
  
  	size = PAGE_ALIGN(size);
e48e67e08   Yinghai Lu   sparsemem: on no ...
404
405
  	map = __alloc_bootmem_node_high(NODE_DATA(nodeid), size * map_count,
  					 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
9bdac9142   Yinghai Lu   sparsemem: Put me...
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
  	if (map) {
  		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  			if (!present_section_nr(pnum))
  				continue;
  			map_map[pnum] = map;
  			map += size;
  		}
  		return;
  	}
  
  	/* fallback */
  	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  		struct mem_section *ms;
  
  		if (!present_section_nr(pnum))
  			continue;
  		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
  		if (map_map[pnum])
  			continue;
  		ms = __nr_to_section(pnum);
  		printk(KERN_ERR "%s: sparsemem memory map backing failed "
  			"some memory will not be available.
  ", __func__);
  		ms->section_mem_map = 0;
  	}
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
432
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
81d0d950e   Yinghai Lu   sparsemem: Fix co...
433
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
9bdac9142   Yinghai Lu   sparsemem: Put me...
434
435
436
437
438
439
440
441
  static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
  				 unsigned long pnum_begin,
  				 unsigned long pnum_end,
  				 unsigned long map_count, int nodeid)
  {
  	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
  					 map_count, nodeid);
  }
81d0d950e   Yinghai Lu   sparsemem: Fix co...
442
  #else
9e5c6da71   Adrian Bunk   make mm/sparse.c:...
443
  static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
8f6aac419   Christoph Lameter   Generic Virtual M...
444
445
446
447
  {
  	struct page *map;
  	struct mem_section *ms = __nr_to_section(pnum);
  	int nid = sparse_early_nid(ms);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
448
  	map = sparse_mem_map_populate(pnum, nid);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
449
450
  	if (map)
  		return map;
8f6aac419   Christoph Lameter   Generic Virtual M...
451
  	printk(KERN_ERR "%s: sparsemem memory map backing failed "
d40cee245   Harvey Harrison   mm: remove remain...
452
453
  			"some memory will not be available.
  ", __func__);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
454
  	ms->section_mem_map = 0;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
455
456
  	return NULL;
  }
9bdac9142   Yinghai Lu   sparsemem: Put me...
457
  #endif
29751f699   Andy Whitcroft   [PATCH] sparsemem...
458

c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
459
460
461
  void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
  {
  }
a4322e1ba   Yinghai Lu   sparsemem: Put us...
462

193faea92   Stephen Rothwell   Move three functi...
463
464
465
466
467
468
469
470
  /*
   * Allocate the accumulated non-linear sections, allocate a mem_map
   * for each and record the physical to section mapping.
   */
  void __init sparse_init(void)
  {
  	unsigned long pnum;
  	struct page *map;
5c0e30664   Mel Gorman   Fix corruption of...
471
  	unsigned long *usemap;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
472
  	unsigned long **usemap_map;
81d0d950e   Yinghai Lu   sparsemem: Fix co...
473
  	int size;
a4322e1ba   Yinghai Lu   sparsemem: Put us...
474
475
476
  	int nodeid_begin = 0;
  	unsigned long pnum_begin = 0;
  	unsigned long usemap_count;
81d0d950e   Yinghai Lu   sparsemem: Fix co...
477
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
9bdac9142   Yinghai Lu   sparsemem: Put me...
478
  	unsigned long map_count;
81d0d950e   Yinghai Lu   sparsemem: Fix co...
479
480
481
  	int size2;
  	struct page **map_map;
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
482
483
484
485
486
487
488
  
  	/*
  	 * map is using big page (aka 2M in x86 64 bit)
  	 * usemap is less one page (aka 24 bytes)
  	 * so alloc 2M (with 2M align) and 24 bytes in turn will
  	 * make next 2M slip to one more 2M later.
  	 * then in big system, the memory will have a lot of holes...
25985edce   Lucas De Marchi   Fix common misspe...
489
  	 * here try to allocate 2M pages continuously.
e123dd3f0   Yinghai Lu   mm: make mem_map ...
490
491
492
493
494
495
496
497
498
  	 *
  	 * powerpc need to call sparse_init_one_section right after each
  	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
  	 */
  	size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
  	usemap_map = alloc_bootmem(size);
  	if (!usemap_map)
  		panic("can not allocate usemap_map
  ");
193faea92   Stephen Rothwell   Move three functi...
499
500
  
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
a4322e1ba   Yinghai Lu   sparsemem: Put us...
501
  		struct mem_section *ms;
540557b94   Andy Whitcroft   sparsemem: record...
502
  		if (!present_section_nr(pnum))
193faea92   Stephen Rothwell   Move three functi...
503
  			continue;
a4322e1ba   Yinghai Lu   sparsemem: Put us...
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
  		ms = __nr_to_section(pnum);
  		nodeid_begin = sparse_early_nid(ms);
  		pnum_begin = pnum;
  		break;
  	}
  	usemap_count = 1;
  	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
  		struct mem_section *ms;
  		int nodeid;
  
  		if (!present_section_nr(pnum))
  			continue;
  		ms = __nr_to_section(pnum);
  		nodeid = sparse_early_nid(ms);
  		if (nodeid == nodeid_begin) {
  			usemap_count++;
  			continue;
  		}
  		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
  		sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
  						 usemap_count, nodeid_begin);
  		/* new start, update count etc*/
  		nodeid_begin = nodeid;
  		pnum_begin = pnum;
  		usemap_count = 1;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
529
  	}
a4322e1ba   Yinghai Lu   sparsemem: Put us...
530
531
532
  	/* ok, last chunk */
  	sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
  					 usemap_count, nodeid_begin);
193faea92   Stephen Rothwell   Move three functi...
533

9bdac9142   Yinghai Lu   sparsemem: Put me...
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
  	size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
  	map_map = alloc_bootmem(size2);
  	if (!map_map)
  		panic("can not allocate map_map
  ");
  
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
  		struct mem_section *ms;
  
  		if (!present_section_nr(pnum))
  			continue;
  		ms = __nr_to_section(pnum);
  		nodeid_begin = sparse_early_nid(ms);
  		pnum_begin = pnum;
  		break;
  	}
  	map_count = 1;
  	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
  		struct mem_section *ms;
  		int nodeid;
  
  		if (!present_section_nr(pnum))
  			continue;
  		ms = __nr_to_section(pnum);
  		nodeid = sparse_early_nid(ms);
  		if (nodeid == nodeid_begin) {
  			map_count++;
  			continue;
  		}
  		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
  		sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
  						 map_count, nodeid_begin);
  		/* new start, update count etc*/
  		nodeid_begin = nodeid;
  		pnum_begin = pnum;
  		map_count = 1;
  	}
  	/* ok, last chunk */
  	sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
  					 map_count, nodeid_begin);
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
576
577
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
  		if (!present_section_nr(pnum))
193faea92   Stephen Rothwell   Move three functi...
578
  			continue;
5c0e30664   Mel Gorman   Fix corruption of...
579

e123dd3f0   Yinghai Lu   mm: make mem_map ...
580
  		usemap = usemap_map[pnum];
5c0e30664   Mel Gorman   Fix corruption of...
581
582
  		if (!usemap)
  			continue;
9bdac9142   Yinghai Lu   sparsemem: Put me...
583
584
585
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
  		map = map_map[pnum];
  #else
e123dd3f0   Yinghai Lu   mm: make mem_map ...
586
  		map = sparse_early_mem_map_alloc(pnum);
9bdac9142   Yinghai Lu   sparsemem: Put me...
587
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
588
589
  		if (!map)
  			continue;
5c0e30664   Mel Gorman   Fix corruption of...
590
591
  		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
  								usemap);
193faea92   Stephen Rothwell   Move three functi...
592
  	}
e123dd3f0   Yinghai Lu   mm: make mem_map ...
593

c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
594
  	vmemmap_populate_print_last();
9bdac9142   Yinghai Lu   sparsemem: Put me...
595
596
597
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
  	free_bootmem(__pa(map_map), size2);
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
598
  	free_bootmem(__pa(usemap_map), size);
193faea92   Stephen Rothwell   Move three functi...
599
600
601
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
98f3cfc1d   Yasunori Goto   memory hotplug: H...
602
603
604
605
606
607
608
609
610
611
612
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
  						 unsigned long nr_pages)
  {
  	/* This will make the necessary allocations eventually. */
  	return sparse_mem_map_populate(pnum, nid);
  }
  static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
  {
  	return; /* XXX: Not implemented yet */
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
613
614
615
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
616
  #else
0b0acbec1   Dave Hansen   [PATCH] memory ho...
617
618
619
620
  static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
  {
  	struct page *page, *ret;
  	unsigned long memmap_size = sizeof(struct page) * nr_pages;
f2d0aa5bf   Yasunori Goto   [PATCH] memory ho...
621
  	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
0b0acbec1   Dave Hansen   [PATCH] memory ho...
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
  	if (page)
  		goto got_map_page;
  
  	ret = vmalloc(memmap_size);
  	if (ret)
  		goto got_map_ptr;
  
  	return NULL;
  got_map_page:
  	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
  got_map_ptr:
  	memset(ret, 0, memmap_size);
  
  	return ret;
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
637
638
639
640
641
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
  						  unsigned long nr_pages)
  {
  	return __kmalloc_section_memmap(nr_pages);
  }
0b0acbec1   Dave Hansen   [PATCH] memory ho...
642
643
  static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
  {
9e2779fa2   Christoph Lameter   is_vmalloc_addr()...
644
  	if (is_vmalloc_addr(memmap))
0b0acbec1   Dave Hansen   [PATCH] memory ho...
645
646
647
648
649
  		vfree(memmap);
  	else
  		free_pages((unsigned long)memmap,
  			   get_order(sizeof(struct page) * nr_pages));
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
650
651
652
653
  
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
  	unsigned long maps_section_nr, removing_section_nr, i;
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
654
  	unsigned long magic;
0c0a4a517   Yasunori Goto   memory hotplug: f...
655
656
  
  	for (i = 0; i < nr_pages; i++, page++) {
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
657
  		magic = (unsigned long) page->lru.next;
0c0a4a517   Yasunori Goto   memory hotplug: f...
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
  
  		BUG_ON(magic == NODE_INFO);
  
  		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
  		removing_section_nr = page->private;
  
  		/*
  		 * When this function is called, the removing section is
  		 * logical offlined state. This means all pages are isolated
  		 * from page allocator. If removing section's memmap is placed
  		 * on the same section, it must not be freed.
  		 * If it is freed, page allocator may allocate it which will
  		 * be removed physically soon.
  		 */
  		if (maps_section_nr != removing_section_nr)
  			put_page_bootmem(page);
  	}
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
676
  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
0b0acbec1   Dave Hansen   [PATCH] memory ho...
677

ea01ea937   Badari Pulavarty   hotplug memory re...
678
679
  static void free_section_usemap(struct page *memmap, unsigned long *usemap)
  {
0c0a4a517   Yasunori Goto   memory hotplug: f...
680
681
  	struct page *usemap_page;
  	unsigned long nr_pages;
ea01ea937   Badari Pulavarty   hotplug memory re...
682
683
  	if (!usemap)
  		return;
0c0a4a517   Yasunori Goto   memory hotplug: f...
684
  	usemap_page = virt_to_page(usemap);
ea01ea937   Badari Pulavarty   hotplug memory re...
685
686
687
  	/*
  	 * Check to see if allocation came from hot-plug-add
  	 */
0c0a4a517   Yasunori Goto   memory hotplug: f...
688
  	if (PageSlab(usemap_page)) {
ea01ea937   Badari Pulavarty   hotplug memory re...
689
690
691
692
693
694
695
  		kfree(usemap);
  		if (memmap)
  			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
  		return;
  	}
  
  	/*
0c0a4a517   Yasunori Goto   memory hotplug: f...
696
697
  	 * The usemap came from bootmem. This is packed with other usemaps
  	 * on the section which has pgdat at boot time. Just keep it as is now.
ea01ea937   Badari Pulavarty   hotplug memory re...
698
  	 */
0c0a4a517   Yasunori Goto   memory hotplug: f...
699
700
701
702
703
704
705
706
707
708
  
  	if (memmap) {
  		struct page *memmap_page;
  		memmap_page = virt_to_page(memmap);
  
  		nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
  			>> PAGE_SHIFT;
  
  		free_map_bootmem(memmap_page, nr_pages);
  	}
ea01ea937   Badari Pulavarty   hotplug memory re...
709
  }
29751f699   Andy Whitcroft   [PATCH] sparsemem...
710
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
711
712
713
714
   * returns the number of sections whose mem_maps were properly
   * set.  If this is <=0, then that means that the passed-in
   * map was not consumed and must be freed.
   */
31168481c   Al Viro   meminit section w...
715
  int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
0b0acbec1   Dave Hansen   [PATCH] memory ho...
716
  			   int nr_pages)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
717
  {
0b0acbec1   Dave Hansen   [PATCH] memory ho...
718
719
720
721
  	unsigned long section_nr = pfn_to_section_nr(start_pfn);
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	struct mem_section *ms;
  	struct page *memmap;
5c0e30664   Mel Gorman   Fix corruption of...
722
  	unsigned long *usemap;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
723
724
  	unsigned long flags;
  	int ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
725

0b0acbec1   Dave Hansen   [PATCH] memory ho...
726
727
728
729
  	/*
  	 * no locking for this, because it does its own
  	 * plus, it does a kmalloc
  	 */
bbd068259   WANG Cong   mm/sparse.c: impr...
730
731
732
  	ret = sparse_index_init(section_nr, pgdat->node_id);
  	if (ret < 0 && ret != -EEXIST)
  		return ret;
98f3cfc1d   Yasunori Goto   memory hotplug: H...
733
  	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
bbd068259   WANG Cong   mm/sparse.c: impr...
734
735
  	if (!memmap)
  		return -ENOMEM;
5c0e30664   Mel Gorman   Fix corruption of...
736
  	usemap = __kmalloc_section_usemap();
bbd068259   WANG Cong   mm/sparse.c: impr...
737
738
739
740
  	if (!usemap) {
  		__kfree_section_memmap(memmap, nr_pages);
  		return -ENOMEM;
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
741
742
  
  	pgdat_resize_lock(pgdat, &flags);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
743

0b0acbec1   Dave Hansen   [PATCH] memory ho...
744
745
746
747
748
  	ms = __pfn_to_section(start_pfn);
  	if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
  		ret = -EEXIST;
  		goto out;
  	}
5c0e30664   Mel Gorman   Fix corruption of...
749

29751f699   Andy Whitcroft   [PATCH] sparsemem...
750
  	ms->section_mem_map |= SECTION_MARKED_PRESENT;
5c0e30664   Mel Gorman   Fix corruption of...
751
  	ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
752

0b0acbec1   Dave Hansen   [PATCH] memory ho...
753
754
  out:
  	pgdat_resize_unlock(pgdat, &flags);
bbd068259   WANG Cong   mm/sparse.c: impr...
755
756
  	if (ret <= 0) {
  		kfree(usemap);
46a66eecd   Mike Kravetz   [PATCH] sparsemem...
757
  		__kfree_section_memmap(memmap, nr_pages);
bbd068259   WANG Cong   mm/sparse.c: impr...
758
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
759
  	return ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
760
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
  
  void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
  {
  	struct page *memmap = NULL;
  	unsigned long *usemap = NULL;
  
  	if (ms->section_mem_map) {
  		usemap = ms->pageblock_flags;
  		memmap = sparse_decode_mem_map(ms->section_mem_map,
  						__section_nr(ms));
  		ms->section_mem_map = 0;
  		ms->pageblock_flags = NULL;
  	}
  
  	free_section_usemap(memmap, usemap);
  }
a3142c8e1   Yasunori Goto   Fix section misma...
777
  #endif