Blame view

mm/sparse.c 20.3 KB
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
1
2
3
  /*
   * sparse memory mappings.
   */
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
4
  #include <linux/mm.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
5
  #include <linux/slab.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
6
7
  #include <linux/mmzone.h>
  #include <linux/bootmem.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
8
  #include <linux/highmem.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
9
  #include <linux/export.h>
28ae55c98   Dave Hansen   [PATCH] sparsemem...
10
  #include <linux/spinlock.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
11
  #include <linux/vmalloc.h>
0c0a4a517   Yasunori Goto   memory hotplug: f...
12
  #include "internal.h"
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
13
  #include <asm/dma.h>
8f6aac419   Christoph Lameter   Generic Virtual M...
14
15
  #include <asm/pgalloc.h>
  #include <asm/pgtable.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
16
17
18
19
20
21
  
  /*
   * Permanent SPARSEMEM data:
   *
   * 1) mem_section	- memory sections, mem_map's for valid memory
   */
3e347261a   Bob Picco   [PATCH] sparsemem...
22
  #ifdef CONFIG_SPARSEMEM_EXTREME
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
23
  struct mem_section *mem_section[NR_SECTION_ROOTS]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
24
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
25
26
  #else
  struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
27
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
28
29
  #endif
  EXPORT_SYMBOL(mem_section);
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
30
31
32
33
34
35
36
37
38
39
40
  #ifdef NODE_NOT_IN_PAGE_FLAGS
  /*
   * If we did not store the node number in the page then we have to
   * do a lookup in the section_to_node_table in order to find which
   * node the page belongs to.
   */
  #if MAX_NUMNODES <= 256
  static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #else
  static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #endif
33dd4e0ec   Ian Campbell   mm: make some str...
41
  int page_to_nid(const struct page *page)
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
42
43
44
45
  {
  	return section_to_node_table[page_to_section(page)];
  }
  EXPORT_SYMBOL(page_to_nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
46
47
48
49
50
51
52
53
54
  
  static void set_section_nid(unsigned long section_nr, int nid)
  {
  	section_to_node_table[section_nr] = nid;
  }
  #else /* !NODE_NOT_IN_PAGE_FLAGS */
  static inline void set_section_nid(unsigned long section_nr, int nid)
  {
  }
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
55
  #endif
3e347261a   Bob Picco   [PATCH] sparsemem...
56
  #ifdef CONFIG_SPARSEMEM_EXTREME
577a32f62   Sam Ravnborg   mm: fix section m...
57
  static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
28ae55c98   Dave Hansen   [PATCH] sparsemem...
58
59
60
61
  {
  	struct mem_section *section = NULL;
  	unsigned long array_size = SECTIONS_PER_ROOT *
  				   sizeof(struct mem_section);
f52407ce2   Shaohua Li   memory hotplug: a...
62
63
64
65
66
67
  	if (slab_is_available()) {
  		if (node_state(nid, N_HIGH_MEMORY))
  			section = kmalloc_node(array_size, GFP_KERNEL, nid);
  		else
  			section = kmalloc(array_size, GFP_KERNEL);
  	} else
46a66eecd   Mike Kravetz   [PATCH] sparsemem...
68
  		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
28ae55c98   Dave Hansen   [PATCH] sparsemem...
69
70
71
72
73
  
  	if (section)
  		memset(section, 0, array_size);
  
  	return section;
3e347261a   Bob Picco   [PATCH] sparsemem...
74
  }
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
75

a3142c8e1   Yasunori Goto   Fix section misma...
76
  static int __meminit sparse_index_init(unsigned long section_nr, int nid)
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
77
  {
34af946a2   Ingo Molnar   [PATCH] spin/rwlo...
78
  	static DEFINE_SPINLOCK(index_init_lock);
28ae55c98   Dave Hansen   [PATCH] sparsemem...
79
80
81
  	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  	struct mem_section *section;
  	int ret = 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
82
83
  
  	if (mem_section[root])
28ae55c98   Dave Hansen   [PATCH] sparsemem...
84
  		return -EEXIST;
3e347261a   Bob Picco   [PATCH] sparsemem...
85

28ae55c98   Dave Hansen   [PATCH] sparsemem...
86
  	section = sparse_index_alloc(nid);
af0cd5a7c   WANG Cong   mm/sparse.c: chec...
87
88
  	if (!section)
  		return -ENOMEM;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
89
90
91
92
93
  	/*
  	 * This lock keeps two different sections from
  	 * reallocating for the same index
  	 */
  	spin_lock(&index_init_lock);
3e347261a   Bob Picco   [PATCH] sparsemem...
94

28ae55c98   Dave Hansen   [PATCH] sparsemem...
95
96
97
98
99
100
101
102
103
104
105
106
107
108
  	if (mem_section[root]) {
  		ret = -EEXIST;
  		goto out;
  	}
  
  	mem_section[root] = section;
  out:
  	spin_unlock(&index_init_lock);
  	return ret;
  }
  #else /* !SPARSEMEM_EXTREME */
  static inline int sparse_index_init(unsigned long section_nr, int nid)
  {
  	return 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
109
  }
28ae55c98   Dave Hansen   [PATCH] sparsemem...
110
  #endif
4ca644d97   Dave Hansen   [PATCH] memory ho...
111
112
  /*
   * Although written for the SPARSEMEM_EXTREME case, this happens
cd881a6b2   Andy Whitcroft   sparsemem: clean ...
113
   * to also work for the flat array case because
4ca644d97   Dave Hansen   [PATCH] memory ho...
114
115
116
117
118
119
   * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
   */
  int __section_nr(struct mem_section* ms)
  {
  	unsigned long root_nr;
  	struct mem_section* root;
12783b002   Mike Kravetz   [PATCH] SPARSEMEM...
120
121
  	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
  		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
4ca644d97   Dave Hansen   [PATCH] memory ho...
122
123
124
125
126
127
128
129
130
  		if (!root)
  			continue;
  
  		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
  		     break;
  	}
  
  	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
  }
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
  /*
   * During early boot, before section_mem_map is used for an actual
   * mem_map, we use section_mem_map to store the section's NUMA
   * node.  This keeps us from having to use another data structure.  The
   * node information is cleared just before we store the real mem_map.
   */
  static inline unsigned long sparse_encode_early_nid(int nid)
  {
  	return (nid << SECTION_NID_SHIFT);
  }
  
  static inline int sparse_early_nid(struct mem_section *section)
  {
  	return (section->section_mem_map >> SECTION_NID_SHIFT);
  }
2dbb51c49   Mel Gorman   mm: make defensiv...
146
147
148
  /* Validate the physical addressing limitations of the model */
  void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
  						unsigned long *end_pfn)
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
149
  {
2dbb51c49   Mel Gorman   mm: make defensiv...
150
  	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
151

bead9a3ab   Ingo Molnar   mm: sparsemem mem...
152
153
154
155
  	/*
  	 * Sanity checks - do not allow an architecture to pass
  	 * in larger pfns than the maximum scope of sparsemem:
  	 */
2dbb51c49   Mel Gorman   mm: make defensiv...
156
157
158
159
160
161
162
163
  	if (*start_pfn > max_sparsemem_pfn) {
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*start_pfn = max_sparsemem_pfn;
  		*end_pfn = max_sparsemem_pfn;
ef161a986   Cyrill Gorcunov   mm: mminit_valida...
164
  	} else if (*end_pfn > max_sparsemem_pfn) {
2dbb51c49   Mel Gorman   mm: make defensiv...
165
166
167
168
169
170
171
172
173
174
175
176
177
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"End of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*end_pfn = max_sparsemem_pfn;
  	}
  }
  
  /* Record a memory area against a node. */
  void __init memory_present(int nid, unsigned long start, unsigned long end)
  {
  	unsigned long pfn;
bead9a3ab   Ingo Molnar   mm: sparsemem mem...
178

d41dee369   Andy Whitcroft   [PATCH] sparsemem...
179
  	start &= PAGE_SECTION_MASK;
2dbb51c49   Mel Gorman   mm: make defensiv...
180
  	mminit_validate_memmodel_limits(&start, &end);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
181
182
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
  		unsigned long section = pfn_to_section_nr(pfn);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
183
184
185
  		struct mem_section *ms;
  
  		sparse_index_init(section, nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
186
  		set_section_nid(section, nid);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
187
188
189
  
  		ms = __nr_to_section(section);
  		if (!ms->section_mem_map)
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
190
191
  			ms->section_mem_map = sparse_encode_early_nid(nid) |
  							SECTION_MARKED_PRESENT;
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
192
193
194
195
196
197
198
199
200
201
202
203
  	}
  }
  
  /*
   * Only used by the i386 NUMA architecures, but relatively
   * generic code.
   */
  unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
  						     unsigned long end_pfn)
  {
  	unsigned long pfn;
  	unsigned long nr_pages = 0;
2dbb51c49   Mel Gorman   mm: make defensiv...
204
  	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
205
206
207
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		if (nid != early_pfn_to_nid(pfn))
  			continue;
540557b94   Andy Whitcroft   sparsemem: record...
208
  		if (pfn_present(pfn))
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
209
210
211
212
213
214
215
  			nr_pages += PAGES_PER_SECTION;
  	}
  
  	return nr_pages * sizeof(struct page);
  }
  
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
216
217
218
219
220
221
222
223
224
225
   * Subtle, we encode the real pfn into the mem_map such that
   * the identity pfn - section_mem_map will return the actual
   * physical page frame number.
   */
  static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
  {
  	return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
  }
  
  /*
ea01ea937   Badari Pulavarty   hotplug memory re...
226
   * Decode mem_map from the coded memmap
29751f699   Andy Whitcroft   [PATCH] sparsemem...
227
   */
29751f699   Andy Whitcroft   [PATCH] sparsemem...
228
229
  struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
  {
ea01ea937   Badari Pulavarty   hotplug memory re...
230
231
  	/* mask off the extra low bits of information */
  	coded_mem_map &= SECTION_MAP_MASK;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
232
233
  	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
  }
a3142c8e1   Yasunori Goto   Fix section misma...
234
  static int __meminit sparse_init_one_section(struct mem_section *ms,
5c0e30664   Mel Gorman   Fix corruption of...
235
236
  		unsigned long pnum, struct page *mem_map,
  		unsigned long *pageblock_bitmap)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
237
  {
540557b94   Andy Whitcroft   sparsemem: record...
238
  	if (!present_section(ms))
29751f699   Andy Whitcroft   [PATCH] sparsemem...
239
  		return -EINVAL;
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
240
  	ms->section_mem_map &= ~SECTION_MAP_MASK;
540557b94   Andy Whitcroft   sparsemem: record...
241
242
  	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
  							SECTION_HAS_MEM_MAP;
5c0e30664   Mel Gorman   Fix corruption of...
243
   	ms->pageblock_flags = pageblock_bitmap;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
244
245
246
  
  	return 1;
  }
047532787   Yasunori Goto   memory hotplug: r...
247
  unsigned long usemap_size(void)
5c0e30664   Mel Gorman   Fix corruption of...
248
249
250
251
252
253
254
255
256
257
258
259
260
  {
  	unsigned long size_bytes;
  	size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
  	size_bytes = roundup(size_bytes, sizeof(unsigned long));
  	return size_bytes;
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
  static unsigned long *__kmalloc_section_usemap(void)
  {
  	return kmalloc(usemap_size(), GFP_KERNEL);
  }
  #endif /* CONFIG_MEMORY_HOTPLUG */
48c906823   Yasunori Goto   memory hotplug: a...
261
262
  #ifdef CONFIG_MEMORY_HOTREMOVE
  static unsigned long * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
263
264
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
  					 unsigned long count)
48c906823   Yasunori Goto   memory hotplug: a...
265
266
267
268
269
270
271
272
273
274
275
276
277
278
  {
  	unsigned long section_nr;
  
  	/*
  	 * A page may contain usemaps for other sections preventing the
  	 * page being freed and making a section unremovable while
  	 * other sections referencing the usemap retmain active. Similarly,
  	 * a pgdat can prevent a section being removed. If section A
  	 * contains a pgdat and section B contains the usemap, both
  	 * sections become inter-dependent. This allocates usemaps
  	 * from the same section as the pgdat where possible to avoid
  	 * this problem.
  	 */
  	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
a4322e1ba   Yinghai Lu   sparsemem: Put us...
279
  	return alloc_bootmem_section(usemap_size() * count, section_nr);
48c906823   Yasunori Goto   memory hotplug: a...
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  	unsigned long usemap_snr, pgdat_snr;
  	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
  	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int usemap_nid;
  
  	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
  	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
  	if (usemap_snr == pgdat_snr)
  		return;
  
  	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
  		/* skip redundant message */
  		return;
  
  	old_usemap_snr = usemap_snr;
  	old_pgdat_snr = pgdat_snr;
  
  	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
  	if (usemap_nid != nid) {
  		printk(KERN_INFO
  		       "node %d must be removed before remove section %ld
  ",
  		       nid, usemap_snr);
  		return;
  	}
  	/*
  	 * There is a circular dependency.
  	 * Some platforms allow un-removable section because they will just
  	 * gather other removable sections for dynamic partitioning.
  	 * Just notify un-removable section's number here.
  	 */
  	printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr,
  	       pgdat_snr, nid);
  	printk(KERN_CONT
  	       " have a circular dependency on usemap and pgdat allocations
  ");
  }
  #else
  static unsigned long * __init
a4322e1ba   Yinghai Lu   sparsemem: Put us...
324
325
  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
  					 unsigned long count)
48c906823   Yasunori Goto   memory hotplug: a...
326
327
328
329
330
331
332
333
  {
  	return NULL;
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  }
  #endif /* CONFIG_MEMORY_HOTREMOVE */
a4322e1ba   Yinghai Lu   sparsemem: Put us...
334
335
336
337
  static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
  				 unsigned long pnum_begin,
  				 unsigned long pnum_end,
  				 unsigned long usemap_count, int nodeid)
5c0e30664   Mel Gorman   Fix corruption of...
338
  {
a4322e1ba   Yinghai Lu   sparsemem: Put us...
339
340
341
  	void *usemap;
  	unsigned long pnum;
  	int size = usemap_size();
5c0e30664   Mel Gorman   Fix corruption of...
342

a4322e1ba   Yinghai Lu   sparsemem: Put us...
343
344
  	usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
  								 usemap_count);
f5bf18fa2   Nishanth Aravamudan   bootmem/sparsemem...
345
346
347
348
349
350
  	if (!usemap) {
  		usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
  		if (!usemap) {
  			printk(KERN_WARNING "%s: allocation failed
  ", __func__);
  			return;
a4322e1ba   Yinghai Lu   sparsemem: Put us...
351
  		}
48c906823   Yasunori Goto   memory hotplug: a...
352
  	}
f5bf18fa2   Nishanth Aravamudan   bootmem/sparsemem...
353
354
355
356
357
358
  	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  		if (!present_section_nr(pnum))
  			continue;
  		usemap_map[pnum] = usemap;
  		usemap += size;
  		check_usemap_section_nr(nodeid, usemap_map[pnum]);
a4322e1ba   Yinghai Lu   sparsemem: Put us...
359
  	}
5c0e30664   Mel Gorman   Fix corruption of...
360
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
361
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
98f3cfc1d   Yasunori Goto   memory hotplug: H...
362
  struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
363
364
  {
  	struct page *map;
e48e67e08   Yinghai Lu   sparsemem: on no ...
365
  	unsigned long size;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
366
367
368
369
  
  	map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
  	if (map)
  		return map;
e48e67e08   Yinghai Lu   sparsemem: on no ...
370
371
372
  	size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
  	map = __alloc_bootmem_node_high(NODE_DATA(nid), size,
  					 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
8f6aac419   Christoph Lameter   Generic Virtual M...
373
374
  	return map;
  }
9bdac9142   Yinghai Lu   sparsemem: Put me...
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
  void __init sparse_mem_maps_populate_node(struct page **map_map,
  					  unsigned long pnum_begin,
  					  unsigned long pnum_end,
  					  unsigned long map_count, int nodeid)
  {
  	void *map;
  	unsigned long pnum;
  	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
  
  	map = alloc_remap(nodeid, size * map_count);
  	if (map) {
  		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  			if (!present_section_nr(pnum))
  				continue;
  			map_map[pnum] = map;
  			map += size;
  		}
  		return;
  	}
  
  	size = PAGE_ALIGN(size);
e48e67e08   Yinghai Lu   sparsemem: on no ...
396
397
  	map = __alloc_bootmem_node_high(NODE_DATA(nodeid), size * map_count,
  					 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
9bdac9142   Yinghai Lu   sparsemem: Put me...
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
  	if (map) {
  		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  			if (!present_section_nr(pnum))
  				continue;
  			map_map[pnum] = map;
  			map += size;
  		}
  		return;
  	}
  
  	/* fallback */
  	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
  		struct mem_section *ms;
  
  		if (!present_section_nr(pnum))
  			continue;
  		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
  		if (map_map[pnum])
  			continue;
  		ms = __nr_to_section(pnum);
  		printk(KERN_ERR "%s: sparsemem memory map backing failed "
  			"some memory will not be available.
  ", __func__);
  		ms->section_mem_map = 0;
  	}
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
424
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
81d0d950e   Yinghai Lu   sparsemem: Fix co...
425
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
9bdac9142   Yinghai Lu   sparsemem: Put me...
426
427
428
429
430
431
432
433
  static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
  				 unsigned long pnum_begin,
  				 unsigned long pnum_end,
  				 unsigned long map_count, int nodeid)
  {
  	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
  					 map_count, nodeid);
  }
81d0d950e   Yinghai Lu   sparsemem: Fix co...
434
  #else
9e5c6da71   Adrian Bunk   make mm/sparse.c:...
435
  static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
8f6aac419   Christoph Lameter   Generic Virtual M...
436
437
438
439
  {
  	struct page *map;
  	struct mem_section *ms = __nr_to_section(pnum);
  	int nid = sparse_early_nid(ms);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
440
  	map = sparse_mem_map_populate(pnum, nid);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
441
442
  	if (map)
  		return map;
8f6aac419   Christoph Lameter   Generic Virtual M...
443
  	printk(KERN_ERR "%s: sparsemem memory map backing failed "
d40cee245   Harvey Harrison   mm: remove remain...
444
445
  			"some memory will not be available.
  ", __func__);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
446
  	ms->section_mem_map = 0;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
447
448
  	return NULL;
  }
9bdac9142   Yinghai Lu   sparsemem: Put me...
449
  #endif
29751f699   Andy Whitcroft   [PATCH] sparsemem...
450

c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
451
452
453
  void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
  {
  }
a4322e1ba   Yinghai Lu   sparsemem: Put us...
454

193faea92   Stephen Rothwell   Move three functi...
455
456
457
458
459
460
461
462
  /*
   * Allocate the accumulated non-linear sections, allocate a mem_map
   * for each and record the physical to section mapping.
   */
  void __init sparse_init(void)
  {
  	unsigned long pnum;
  	struct page *map;
5c0e30664   Mel Gorman   Fix corruption of...
463
  	unsigned long *usemap;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
464
  	unsigned long **usemap_map;
81d0d950e   Yinghai Lu   sparsemem: Fix co...
465
  	int size;
a4322e1ba   Yinghai Lu   sparsemem: Put us...
466
467
468
  	int nodeid_begin = 0;
  	unsigned long pnum_begin = 0;
  	unsigned long usemap_count;
81d0d950e   Yinghai Lu   sparsemem: Fix co...
469
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
9bdac9142   Yinghai Lu   sparsemem: Put me...
470
  	unsigned long map_count;
81d0d950e   Yinghai Lu   sparsemem: Fix co...
471
472
473
  	int size2;
  	struct page **map_map;
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
474
475
476
477
478
479
480
  
  	/*
  	 * map is using big page (aka 2M in x86 64 bit)
  	 * usemap is less one page (aka 24 bytes)
  	 * so alloc 2M (with 2M align) and 24 bytes in turn will
  	 * make next 2M slip to one more 2M later.
  	 * then in big system, the memory will have a lot of holes...
25985edce   Lucas De Marchi   Fix common misspe...
481
  	 * here try to allocate 2M pages continuously.
e123dd3f0   Yinghai Lu   mm: make mem_map ...
482
483
484
485
486
487
488
489
490
  	 *
  	 * powerpc need to call sparse_init_one_section right after each
  	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
  	 */
  	size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
  	usemap_map = alloc_bootmem(size);
  	if (!usemap_map)
  		panic("can not allocate usemap_map
  ");
193faea92   Stephen Rothwell   Move three functi...
491
492
  
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
a4322e1ba   Yinghai Lu   sparsemem: Put us...
493
  		struct mem_section *ms;
540557b94   Andy Whitcroft   sparsemem: record...
494
  		if (!present_section_nr(pnum))
193faea92   Stephen Rothwell   Move three functi...
495
  			continue;
a4322e1ba   Yinghai Lu   sparsemem: Put us...
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
  		ms = __nr_to_section(pnum);
  		nodeid_begin = sparse_early_nid(ms);
  		pnum_begin = pnum;
  		break;
  	}
  	usemap_count = 1;
  	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
  		struct mem_section *ms;
  		int nodeid;
  
  		if (!present_section_nr(pnum))
  			continue;
  		ms = __nr_to_section(pnum);
  		nodeid = sparse_early_nid(ms);
  		if (nodeid == nodeid_begin) {
  			usemap_count++;
  			continue;
  		}
  		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
  		sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
  						 usemap_count, nodeid_begin);
  		/* new start, update count etc*/
  		nodeid_begin = nodeid;
  		pnum_begin = pnum;
  		usemap_count = 1;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
521
  	}
a4322e1ba   Yinghai Lu   sparsemem: Put us...
522
523
524
  	/* ok, last chunk */
  	sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
  					 usemap_count, nodeid_begin);
193faea92   Stephen Rothwell   Move three functi...
525

9bdac9142   Yinghai Lu   sparsemem: Put me...
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
  	size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
  	map_map = alloc_bootmem(size2);
  	if (!map_map)
  		panic("can not allocate map_map
  ");
  
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
  		struct mem_section *ms;
  
  		if (!present_section_nr(pnum))
  			continue;
  		ms = __nr_to_section(pnum);
  		nodeid_begin = sparse_early_nid(ms);
  		pnum_begin = pnum;
  		break;
  	}
  	map_count = 1;
  	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
  		struct mem_section *ms;
  		int nodeid;
  
  		if (!present_section_nr(pnum))
  			continue;
  		ms = __nr_to_section(pnum);
  		nodeid = sparse_early_nid(ms);
  		if (nodeid == nodeid_begin) {
  			map_count++;
  			continue;
  		}
  		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
  		sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
  						 map_count, nodeid_begin);
  		/* new start, update count etc*/
  		nodeid_begin = nodeid;
  		pnum_begin = pnum;
  		map_count = 1;
  	}
  	/* ok, last chunk */
  	sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
  					 map_count, nodeid_begin);
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
568
569
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
  		if (!present_section_nr(pnum))
193faea92   Stephen Rothwell   Move three functi...
570
  			continue;
5c0e30664   Mel Gorman   Fix corruption of...
571

e123dd3f0   Yinghai Lu   mm: make mem_map ...
572
  		usemap = usemap_map[pnum];
5c0e30664   Mel Gorman   Fix corruption of...
573
574
  		if (!usemap)
  			continue;
9bdac9142   Yinghai Lu   sparsemem: Put me...
575
576
577
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
  		map = map_map[pnum];
  #else
e123dd3f0   Yinghai Lu   mm: make mem_map ...
578
  		map = sparse_early_mem_map_alloc(pnum);
9bdac9142   Yinghai Lu   sparsemem: Put me...
579
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
580
581
  		if (!map)
  			continue;
5c0e30664   Mel Gorman   Fix corruption of...
582
583
  		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
  								usemap);
193faea92   Stephen Rothwell   Move three functi...
584
  	}
e123dd3f0   Yinghai Lu   mm: make mem_map ...
585

c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
586
  	vmemmap_populate_print_last();
9bdac9142   Yinghai Lu   sparsemem: Put me...
587
588
589
  #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
  	free_bootmem(__pa(map_map), size2);
  #endif
e123dd3f0   Yinghai Lu   mm: make mem_map ...
590
  	free_bootmem(__pa(usemap_map), size);
193faea92   Stephen Rothwell   Move three functi...
591
592
593
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
98f3cfc1d   Yasunori Goto   memory hotplug: H...
594
595
596
597
598
599
600
601
602
603
604
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
  						 unsigned long nr_pages)
  {
  	/* This will make the necessary allocations eventually. */
  	return sparse_mem_map_populate(pnum, nid);
  }
  static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
  {
  	return; /* XXX: Not implemented yet */
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
605
606
607
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
608
  #else
0b0acbec1   Dave Hansen   [PATCH] memory ho...
609
610
611
612
  static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
  {
  	struct page *page, *ret;
  	unsigned long memmap_size = sizeof(struct page) * nr_pages;
f2d0aa5bf   Yasunori Goto   [PATCH] memory ho...
613
  	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
0b0acbec1   Dave Hansen   [PATCH] memory ho...
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
  	if (page)
  		goto got_map_page;
  
  	ret = vmalloc(memmap_size);
  	if (ret)
  		goto got_map_ptr;
  
  	return NULL;
  got_map_page:
  	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
  got_map_ptr:
  	memset(ret, 0, memmap_size);
  
  	return ret;
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
629
630
631
632
633
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
  						  unsigned long nr_pages)
  {
  	return __kmalloc_section_memmap(nr_pages);
  }
0b0acbec1   Dave Hansen   [PATCH] memory ho...
634
635
  static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
  {
9e2779fa2   Christoph Lameter   is_vmalloc_addr()...
636
  	if (is_vmalloc_addr(memmap))
0b0acbec1   Dave Hansen   [PATCH] memory ho...
637
638
639
640
641
  		vfree(memmap);
  	else
  		free_pages((unsigned long)memmap,
  			   get_order(sizeof(struct page) * nr_pages));
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
642
643
644
645
  
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
  	unsigned long maps_section_nr, removing_section_nr, i;
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
646
  	unsigned long magic;
0c0a4a517   Yasunori Goto   memory hotplug: f...
647
648
  
  	for (i = 0; i < nr_pages; i++, page++) {
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
649
  		magic = (unsigned long) page->lru.next;
0c0a4a517   Yasunori Goto   memory hotplug: f...
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
  
  		BUG_ON(magic == NODE_INFO);
  
  		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
  		removing_section_nr = page->private;
  
  		/*
  		 * When this function is called, the removing section is
  		 * logical offlined state. This means all pages are isolated
  		 * from page allocator. If removing section's memmap is placed
  		 * on the same section, it must not be freed.
  		 * If it is freed, page allocator may allocate it which will
  		 * be removed physically soon.
  		 */
  		if (maps_section_nr != removing_section_nr)
  			put_page_bootmem(page);
  	}
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
668
  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
0b0acbec1   Dave Hansen   [PATCH] memory ho...
669

ea01ea937   Badari Pulavarty   hotplug memory re...
670
671
  static void free_section_usemap(struct page *memmap, unsigned long *usemap)
  {
0c0a4a517   Yasunori Goto   memory hotplug: f...
672
673
  	struct page *usemap_page;
  	unsigned long nr_pages;
ea01ea937   Badari Pulavarty   hotplug memory re...
674
675
  	if (!usemap)
  		return;
0c0a4a517   Yasunori Goto   memory hotplug: f...
676
  	usemap_page = virt_to_page(usemap);
ea01ea937   Badari Pulavarty   hotplug memory re...
677
678
679
  	/*
  	 * Check to see if allocation came from hot-plug-add
  	 */
0c0a4a517   Yasunori Goto   memory hotplug: f...
680
  	if (PageSlab(usemap_page)) {
ea01ea937   Badari Pulavarty   hotplug memory re...
681
682
683
684
685
686
687
  		kfree(usemap);
  		if (memmap)
  			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
  		return;
  	}
  
  	/*
0c0a4a517   Yasunori Goto   memory hotplug: f...
688
689
  	 * The usemap came from bootmem. This is packed with other usemaps
  	 * on the section which has pgdat at boot time. Just keep it as is now.
ea01ea937   Badari Pulavarty   hotplug memory re...
690
  	 */
0c0a4a517   Yasunori Goto   memory hotplug: f...
691
692
693
694
695
696
697
698
699
700
  
  	if (memmap) {
  		struct page *memmap_page;
  		memmap_page = virt_to_page(memmap);
  
  		nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
  			>> PAGE_SHIFT;
  
  		free_map_bootmem(memmap_page, nr_pages);
  	}
ea01ea937   Badari Pulavarty   hotplug memory re...
701
  }
29751f699   Andy Whitcroft   [PATCH] sparsemem...
702
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
703
704
705
706
   * returns the number of sections whose mem_maps were properly
   * set.  If this is <=0, then that means that the passed-in
   * map was not consumed and must be freed.
   */
31168481c   Al Viro   meminit section w...
707
  int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
0b0acbec1   Dave Hansen   [PATCH] memory ho...
708
  			   int nr_pages)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
709
  {
0b0acbec1   Dave Hansen   [PATCH] memory ho...
710
711
712
713
  	unsigned long section_nr = pfn_to_section_nr(start_pfn);
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	struct mem_section *ms;
  	struct page *memmap;
5c0e30664   Mel Gorman   Fix corruption of...
714
  	unsigned long *usemap;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
715
716
  	unsigned long flags;
  	int ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
717

0b0acbec1   Dave Hansen   [PATCH] memory ho...
718
719
720
721
  	/*
  	 * no locking for this, because it does its own
  	 * plus, it does a kmalloc
  	 */
bbd068259   WANG Cong   mm/sparse.c: impr...
722
723
724
  	ret = sparse_index_init(section_nr, pgdat->node_id);
  	if (ret < 0 && ret != -EEXIST)
  		return ret;
98f3cfc1d   Yasunori Goto   memory hotplug: H...
725
  	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
bbd068259   WANG Cong   mm/sparse.c: impr...
726
727
  	if (!memmap)
  		return -ENOMEM;
5c0e30664   Mel Gorman   Fix corruption of...
728
  	usemap = __kmalloc_section_usemap();
bbd068259   WANG Cong   mm/sparse.c: impr...
729
730
731
732
  	if (!usemap) {
  		__kfree_section_memmap(memmap, nr_pages);
  		return -ENOMEM;
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
733
734
  
  	pgdat_resize_lock(pgdat, &flags);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
735

0b0acbec1   Dave Hansen   [PATCH] memory ho...
736
737
738
739
740
  	ms = __pfn_to_section(start_pfn);
  	if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
  		ret = -EEXIST;
  		goto out;
  	}
5c0e30664   Mel Gorman   Fix corruption of...
741

29751f699   Andy Whitcroft   [PATCH] sparsemem...
742
  	ms->section_mem_map |= SECTION_MARKED_PRESENT;
5c0e30664   Mel Gorman   Fix corruption of...
743
  	ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
744

0b0acbec1   Dave Hansen   [PATCH] memory ho...
745
746
  out:
  	pgdat_resize_unlock(pgdat, &flags);
bbd068259   WANG Cong   mm/sparse.c: impr...
747
748
  	if (ret <= 0) {
  		kfree(usemap);
46a66eecd   Mike Kravetz   [PATCH] sparsemem...
749
  		__kfree_section_memmap(memmap, nr_pages);
bbd068259   WANG Cong   mm/sparse.c: impr...
750
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
751
  	return ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
752
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
  
  void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
  {
  	struct page *memmap = NULL;
  	unsigned long *usemap = NULL;
  
  	if (ms->section_mem_map) {
  		usemap = ms->pageblock_flags;
  		memmap = sparse_decode_mem_map(ms->section_mem_map,
  						__section_nr(ms));
  		ms->section_mem_map = 0;
  		ms->pageblock_flags = NULL;
  	}
  
  	free_section_usemap(memmap, usemap);
  }
a3142c8e1   Yasunori Goto   Fix section misma...
769
  #endif