Blame view

mm/sparse.c 16.3 KB
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
1
2
3
  /*
   * sparse memory mappings.
   */
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
4
5
6
  #include <linux/mm.h>
  #include <linux/mmzone.h>
  #include <linux/bootmem.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
7
  #include <linux/highmem.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
8
  #include <linux/module.h>
28ae55c98   Dave Hansen   [PATCH] sparsemem...
9
  #include <linux/spinlock.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
10
  #include <linux/vmalloc.h>
0c0a4a517   Yasunori Goto   memory hotplug: f...
11
  #include "internal.h"
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
12
  #include <asm/dma.h>
8f6aac419   Christoph Lameter   Generic Virtual M...
13
14
  #include <asm/pgalloc.h>
  #include <asm/pgtable.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
15
16
17
18
19
20
  
  /*
   * Permanent SPARSEMEM data:
   *
   * 1) mem_section	- memory sections, mem_map's for valid memory
   */
3e347261a   Bob Picco   [PATCH] sparsemem...
21
  #ifdef CONFIG_SPARSEMEM_EXTREME
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
22
  struct mem_section *mem_section[NR_SECTION_ROOTS]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
23
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
24
25
  #else
  struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
26
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
27
28
  #endif
  EXPORT_SYMBOL(mem_section);
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
29
30
31
32
33
34
35
36
37
38
39
  #ifdef NODE_NOT_IN_PAGE_FLAGS
  /*
   * If we did not store the node number in the page then we have to
   * do a lookup in the section_to_node_table in order to find which
   * node the page belongs to.
   */
  #if MAX_NUMNODES <= 256
  static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #else
  static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #endif
25ba77c14   Andy Whitcroft   [PATCH] numa node...
40
  int page_to_nid(struct page *page)
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
41
42
43
44
  {
  	return section_to_node_table[page_to_section(page)];
  }
  EXPORT_SYMBOL(page_to_nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
45
46
47
48
49
50
51
52
53
  
  static void set_section_nid(unsigned long section_nr, int nid)
  {
  	section_to_node_table[section_nr] = nid;
  }
  #else /* !NODE_NOT_IN_PAGE_FLAGS */
  static inline void set_section_nid(unsigned long section_nr, int nid)
  {
  }
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
54
  #endif
3e347261a   Bob Picco   [PATCH] sparsemem...
55
  #ifdef CONFIG_SPARSEMEM_EXTREME
577a32f62   Sam Ravnborg   mm: fix section m...
56
  static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
28ae55c98   Dave Hansen   [PATCH] sparsemem...
57
58
59
60
  {
  	struct mem_section *section = NULL;
  	unsigned long array_size = SECTIONS_PER_ROOT *
  				   sizeof(struct mem_section);
f52407ce2   Shaohua Li   memory hotplug: a...
61
62
63
64
65
66
  	if (slab_is_available()) {
  		if (node_state(nid, N_HIGH_MEMORY))
  			section = kmalloc_node(array_size, GFP_KERNEL, nid);
  		else
  			section = kmalloc(array_size, GFP_KERNEL);
  	} else
46a66eecd   Mike Kravetz   [PATCH] sparsemem...
67
  		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
28ae55c98   Dave Hansen   [PATCH] sparsemem...
68
69
70
71
72
  
  	if (section)
  		memset(section, 0, array_size);
  
  	return section;
3e347261a   Bob Picco   [PATCH] sparsemem...
73
  }
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
74

a3142c8e1   Yasunori Goto   Fix section misma...
75
  static int __meminit sparse_index_init(unsigned long section_nr, int nid)
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
76
  {
34af946a2   Ingo Molnar   [PATCH] spin/rwlo...
77
  	static DEFINE_SPINLOCK(index_init_lock);
28ae55c98   Dave Hansen   [PATCH] sparsemem...
78
79
80
  	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  	struct mem_section *section;
  	int ret = 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
81
82
  
  	if (mem_section[root])
28ae55c98   Dave Hansen   [PATCH] sparsemem...
83
  		return -EEXIST;
3e347261a   Bob Picco   [PATCH] sparsemem...
84

28ae55c98   Dave Hansen   [PATCH] sparsemem...
85
  	section = sparse_index_alloc(nid);
af0cd5a7c   WANG Cong   mm/sparse.c: chec...
86
87
  	if (!section)
  		return -ENOMEM;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
88
89
90
91
92
  	/*
  	 * This lock keeps two different sections from
  	 * reallocating for the same index
  	 */
  	spin_lock(&index_init_lock);
3e347261a   Bob Picco   [PATCH] sparsemem...
93

28ae55c98   Dave Hansen   [PATCH] sparsemem...
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  	if (mem_section[root]) {
  		ret = -EEXIST;
  		goto out;
  	}
  
  	mem_section[root] = section;
  out:
  	spin_unlock(&index_init_lock);
  	return ret;
  }
  #else /* !SPARSEMEM_EXTREME */
  static inline int sparse_index_init(unsigned long section_nr, int nid)
  {
  	return 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
108
  }
28ae55c98   Dave Hansen   [PATCH] sparsemem...
109
  #endif
4ca644d97   Dave Hansen   [PATCH] memory ho...
110
111
  /*
   * Although written for the SPARSEMEM_EXTREME case, this happens
cd881a6b2   Andy Whitcroft   sparsemem: clean ...
112
   * to also work for the flat array case because
4ca644d97   Dave Hansen   [PATCH] memory ho...
113
114
115
116
117
118
   * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
   */
  int __section_nr(struct mem_section* ms)
  {
  	unsigned long root_nr;
  	struct mem_section* root;
12783b002   Mike Kravetz   [PATCH] SPARSEMEM...
119
120
  	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
  		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
4ca644d97   Dave Hansen   [PATCH] memory ho...
121
122
123
124
125
126
127
128
129
  		if (!root)
  			continue;
  
  		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
  		     break;
  	}
  
  	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
  }
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
  /*
   * During early boot, before section_mem_map is used for an actual
   * mem_map, we use section_mem_map to store the section's NUMA
   * node.  This keeps us from having to use another data structure.  The
   * node information is cleared just before we store the real mem_map.
   */
  static inline unsigned long sparse_encode_early_nid(int nid)
  {
  	return (nid << SECTION_NID_SHIFT);
  }
  
  static inline int sparse_early_nid(struct mem_section *section)
  {
  	return (section->section_mem_map >> SECTION_NID_SHIFT);
  }
2dbb51c49   Mel Gorman   mm: make defensiv...
145
146
147
  /* Validate the physical addressing limitations of the model */
  void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
  						unsigned long *end_pfn)
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
148
  {
2dbb51c49   Mel Gorman   mm: make defensiv...
149
  	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
150

bead9a3ab   Ingo Molnar   mm: sparsemem mem...
151
152
153
154
  	/*
  	 * Sanity checks - do not allow an architecture to pass
  	 * in larger pfns than the maximum scope of sparsemem:
  	 */
2dbb51c49   Mel Gorman   mm: make defensiv...
155
156
157
158
159
160
161
162
  	if (*start_pfn > max_sparsemem_pfn) {
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*start_pfn = max_sparsemem_pfn;
  		*end_pfn = max_sparsemem_pfn;
ef161a986   Cyrill Gorcunov   mm: mminit_valida...
163
  	} else if (*end_pfn > max_sparsemem_pfn) {
2dbb51c49   Mel Gorman   mm: make defensiv...
164
165
166
167
168
169
170
171
172
173
174
175
176
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"End of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*end_pfn = max_sparsemem_pfn;
  	}
  }
  
  /* Record a memory area against a node. */
  void __init memory_present(int nid, unsigned long start, unsigned long end)
  {
  	unsigned long pfn;
bead9a3ab   Ingo Molnar   mm: sparsemem mem...
177

d41dee369   Andy Whitcroft   [PATCH] sparsemem...
178
  	start &= PAGE_SECTION_MASK;
2dbb51c49   Mel Gorman   mm: make defensiv...
179
  	mminit_validate_memmodel_limits(&start, &end);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
180
181
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
  		unsigned long section = pfn_to_section_nr(pfn);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
182
183
184
  		struct mem_section *ms;
  
  		sparse_index_init(section, nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
185
  		set_section_nid(section, nid);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
186
187
188
  
  		ms = __nr_to_section(section);
  		if (!ms->section_mem_map)
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
189
190
  			ms->section_mem_map = sparse_encode_early_nid(nid) |
  							SECTION_MARKED_PRESENT;
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
191
192
193
194
195
196
197
198
199
200
201
202
  	}
  }
  
  /*
   * Only used by the i386 NUMA architecures, but relatively
   * generic code.
   */
  unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
  						     unsigned long end_pfn)
  {
  	unsigned long pfn;
  	unsigned long nr_pages = 0;
2dbb51c49   Mel Gorman   mm: make defensiv...
203
  	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
204
205
206
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		if (nid != early_pfn_to_nid(pfn))
  			continue;
540557b94   Andy Whitcroft   sparsemem: record...
207
  		if (pfn_present(pfn))
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
208
209
210
211
212
213
214
  			nr_pages += PAGES_PER_SECTION;
  	}
  
  	return nr_pages * sizeof(struct page);
  }
  
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
215
216
217
218
219
220
221
222
223
224
   * Subtle, we encode the real pfn into the mem_map such that
   * the identity pfn - section_mem_map will return the actual
   * physical page frame number.
   */
  static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
  {
  	return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
  }
  
  /*
ea01ea937   Badari Pulavarty   hotplug memory re...
225
   * Decode mem_map from the coded memmap
29751f699   Andy Whitcroft   [PATCH] sparsemem...
226
   */
29751f699   Andy Whitcroft   [PATCH] sparsemem...
227
228
  struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
  {
ea01ea937   Badari Pulavarty   hotplug memory re...
229
230
  	/* mask off the extra low bits of information */
  	coded_mem_map &= SECTION_MAP_MASK;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
231
232
  	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
  }
a3142c8e1   Yasunori Goto   Fix section misma...
233
  static int __meminit sparse_init_one_section(struct mem_section *ms,
5c0e30664   Mel Gorman   Fix corruption of...
234
235
  		unsigned long pnum, struct page *mem_map,
  		unsigned long *pageblock_bitmap)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
236
  {
540557b94   Andy Whitcroft   sparsemem: record...
237
  	if (!present_section(ms))
29751f699   Andy Whitcroft   [PATCH] sparsemem...
238
  		return -EINVAL;
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
239
  	ms->section_mem_map &= ~SECTION_MAP_MASK;
540557b94   Andy Whitcroft   sparsemem: record...
240
241
  	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
  							SECTION_HAS_MEM_MAP;
5c0e30664   Mel Gorman   Fix corruption of...
242
   	ms->pageblock_flags = pageblock_bitmap;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
243
244
245
  
  	return 1;
  }
047532787   Yasunori Goto   memory hotplug: r...
246
  unsigned long usemap_size(void)
5c0e30664   Mel Gorman   Fix corruption of...
247
248
249
250
251
252
253
254
255
256
257
258
259
  {
  	unsigned long size_bytes;
  	size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
  	size_bytes = roundup(size_bytes, sizeof(unsigned long));
  	return size_bytes;
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
  static unsigned long *__kmalloc_section_usemap(void)
  {
  	return kmalloc(usemap_size(), GFP_KERNEL);
  }
  #endif /* CONFIG_MEMORY_HOTPLUG */
48c906823   Yasunori Goto   memory hotplug: a...
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
  #ifdef CONFIG_MEMORY_HOTREMOVE
  static unsigned long * __init
  sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
  {
  	unsigned long section_nr;
  
  	/*
  	 * A page may contain usemaps for other sections preventing the
  	 * page being freed and making a section unremovable while
  	 * other sections referencing the usemap retmain active. Similarly,
  	 * a pgdat can prevent a section being removed. If section A
  	 * contains a pgdat and section B contains the usemap, both
  	 * sections become inter-dependent. This allocates usemaps
  	 * from the same section as the pgdat where possible to avoid
  	 * this problem.
  	 */
  	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
  	return alloc_bootmem_section(usemap_size(), section_nr);
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  	unsigned long usemap_snr, pgdat_snr;
  	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
  	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int usemap_nid;
  
  	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
  	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
  	if (usemap_snr == pgdat_snr)
  		return;
  
  	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
  		/* skip redundant message */
  		return;
  
  	old_usemap_snr = usemap_snr;
  	old_pgdat_snr = pgdat_snr;
  
  	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
  	if (usemap_nid != nid) {
  		printk(KERN_INFO
  		       "node %d must be removed before remove section %ld
  ",
  		       nid, usemap_snr);
  		return;
  	}
  	/*
  	 * There is a circular dependency.
  	 * Some platforms allow un-removable section because they will just
  	 * gather other removable sections for dynamic partitioning.
  	 * Just notify un-removable section's number here.
  	 */
  	printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr,
  	       pgdat_snr, nid);
  	printk(KERN_CONT
  	       " have a circular dependency on usemap and pgdat allocations
  ");
  }
  #else
  static unsigned long * __init
  sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
  {
  	return NULL;
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  }
  #endif /* CONFIG_MEMORY_HOTREMOVE */
a322f8ab6   Sam Ravnborg   mm: fix section m...
331
  static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
5c0e30664   Mel Gorman   Fix corruption of...
332
  {
516746444   Andrew Morton   revert "memory ho...
333
  	unsigned long *usemap;
5c0e30664   Mel Gorman   Fix corruption of...
334
335
  	struct mem_section *ms = __nr_to_section(pnum);
  	int nid = sparse_early_nid(ms);
48c906823   Yasunori Goto   memory hotplug: a...
336
  	usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid));
5c0e30664   Mel Gorman   Fix corruption of...
337
338
  	if (usemap)
  		return usemap;
48c906823   Yasunori Goto   memory hotplug: a...
339
340
341
342
343
  	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
  	if (usemap) {
  		check_usemap_section_nr(nid, usemap);
  		return usemap;
  	}
5c0e30664   Mel Gorman   Fix corruption of...
344
345
  	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
  	nid = 0;
d40cee245   Harvey Harrison   mm: remove remain...
346
347
  	printk(KERN_WARNING "%s: allocation failed
  ", __func__);
5c0e30664   Mel Gorman   Fix corruption of...
348
349
  	return NULL;
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
350
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
98f3cfc1d   Yasunori Goto   memory hotplug: H...
351
  struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
352
353
  {
  	struct page *map;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
354
355
356
357
  
  	map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
  	if (map)
  		return map;
9d99217a0   Yasunori Goto   memory hotplug: a...
358
359
  	map = alloc_bootmem_pages_node(NODE_DATA(nid),
  		       PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION));
8f6aac419   Christoph Lameter   Generic Virtual M...
360
361
362
  	return map;
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
9e5c6da71   Adrian Bunk   make mm/sparse.c:...
363
  static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
8f6aac419   Christoph Lameter   Generic Virtual M...
364
365
366
367
  {
  	struct page *map;
  	struct mem_section *ms = __nr_to_section(pnum);
  	int nid = sparse_early_nid(ms);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
368
  	map = sparse_mem_map_populate(pnum, nid);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
369
370
  	if (map)
  		return map;
8f6aac419   Christoph Lameter   Generic Virtual M...
371
  	printk(KERN_ERR "%s: sparsemem memory map backing failed "
d40cee245   Harvey Harrison   mm: remove remain...
372
373
  			"some memory will not be available.
  ", __func__);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
374
  	ms->section_mem_map = 0;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
375
376
  	return NULL;
  }
c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
377
378
379
  void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
  {
  }
193faea92   Stephen Rothwell   Move three functi...
380
381
382
383
384
385
386
387
  /*
   * Allocate the accumulated non-linear sections, allocate a mem_map
   * for each and record the physical to section mapping.
   */
  void __init sparse_init(void)
  {
  	unsigned long pnum;
  	struct page *map;
5c0e30664   Mel Gorman   Fix corruption of...
388
  	unsigned long *usemap;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
  	unsigned long **usemap_map;
  	int size;
  
  	/*
  	 * map is using big page (aka 2M in x86 64 bit)
  	 * usemap is less one page (aka 24 bytes)
  	 * so alloc 2M (with 2M align) and 24 bytes in turn will
  	 * make next 2M slip to one more 2M later.
  	 * then in big system, the memory will have a lot of holes...
  	 * here try to allocate 2M pages continously.
  	 *
  	 * powerpc need to call sparse_init_one_section right after each
  	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
  	 */
  	size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
  	usemap_map = alloc_bootmem(size);
  	if (!usemap_map)
  		panic("can not allocate usemap_map
  ");
193faea92   Stephen Rothwell   Move three functi...
408
409
  
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
540557b94   Andy Whitcroft   sparsemem: record...
410
  		if (!present_section_nr(pnum))
193faea92   Stephen Rothwell   Move three functi...
411
  			continue;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
412
413
  		usemap_map[pnum] = sparse_early_usemap_alloc(pnum);
  	}
193faea92   Stephen Rothwell   Move three functi...
414

e123dd3f0   Yinghai Lu   mm: make mem_map ...
415
416
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
  		if (!present_section_nr(pnum))
193faea92   Stephen Rothwell   Move three functi...
417
  			continue;
5c0e30664   Mel Gorman   Fix corruption of...
418

e123dd3f0   Yinghai Lu   mm: make mem_map ...
419
  		usemap = usemap_map[pnum];
5c0e30664   Mel Gorman   Fix corruption of...
420
421
  		if (!usemap)
  			continue;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
422
423
424
  		map = sparse_early_mem_map_alloc(pnum);
  		if (!map)
  			continue;
5c0e30664   Mel Gorman   Fix corruption of...
425
426
  		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
  								usemap);
193faea92   Stephen Rothwell   Move three functi...
427
  	}
e123dd3f0   Yinghai Lu   mm: make mem_map ...
428

c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
429
  	vmemmap_populate_print_last();
e123dd3f0   Yinghai Lu   mm: make mem_map ...
430
  	free_bootmem(__pa(usemap_map), size);
193faea92   Stephen Rothwell   Move three functi...
431
432
433
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
98f3cfc1d   Yasunori Goto   memory hotplug: H...
434
435
436
437
438
439
440
441
442
443
444
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
  						 unsigned long nr_pages)
  {
  	/* This will make the necessary allocations eventually. */
  	return sparse_mem_map_populate(pnum, nid);
  }
  static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
  {
  	return; /* XXX: Not implemented yet */
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
445
446
447
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
448
  #else
0b0acbec1   Dave Hansen   [PATCH] memory ho...
449
450
451
452
  static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
  {
  	struct page *page, *ret;
  	unsigned long memmap_size = sizeof(struct page) * nr_pages;
f2d0aa5bf   Yasunori Goto   [PATCH] memory ho...
453
  	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
0b0acbec1   Dave Hansen   [PATCH] memory ho...
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
  	if (page)
  		goto got_map_page;
  
  	ret = vmalloc(memmap_size);
  	if (ret)
  		goto got_map_ptr;
  
  	return NULL;
  got_map_page:
  	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
  got_map_ptr:
  	memset(ret, 0, memmap_size);
  
  	return ret;
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
469
470
471
472
473
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
  						  unsigned long nr_pages)
  {
  	return __kmalloc_section_memmap(nr_pages);
  }
0b0acbec1   Dave Hansen   [PATCH] memory ho...
474
475
  static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
  {
9e2779fa2   Christoph Lameter   is_vmalloc_addr()...
476
  	if (is_vmalloc_addr(memmap))
0b0acbec1   Dave Hansen   [PATCH] memory ho...
477
478
479
480
481
  		vfree(memmap);
  	else
  		free_pages((unsigned long)memmap,
  			   get_order(sizeof(struct page) * nr_pages));
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
  
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
  	unsigned long maps_section_nr, removing_section_nr, i;
  	int magic;
  
  	for (i = 0; i < nr_pages; i++, page++) {
  		magic = atomic_read(&page->_mapcount);
  
  		BUG_ON(magic == NODE_INFO);
  
  		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
  		removing_section_nr = page->private;
  
  		/*
  		 * When this function is called, the removing section is
  		 * logical offlined state. This means all pages are isolated
  		 * from page allocator. If removing section's memmap is placed
  		 * on the same section, it must not be freed.
  		 * If it is freed, page allocator may allocate it which will
  		 * be removed physically soon.
  		 */
  		if (maps_section_nr != removing_section_nr)
  			put_page_bootmem(page);
  	}
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
508
  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
0b0acbec1   Dave Hansen   [PATCH] memory ho...
509

ea01ea937   Badari Pulavarty   hotplug memory re...
510
511
  static void free_section_usemap(struct page *memmap, unsigned long *usemap)
  {
0c0a4a517   Yasunori Goto   memory hotplug: f...
512
513
  	struct page *usemap_page;
  	unsigned long nr_pages;
ea01ea937   Badari Pulavarty   hotplug memory re...
514
515
  	if (!usemap)
  		return;
0c0a4a517   Yasunori Goto   memory hotplug: f...
516
  	usemap_page = virt_to_page(usemap);
ea01ea937   Badari Pulavarty   hotplug memory re...
517
518
519
  	/*
  	 * Check to see if allocation came from hot-plug-add
  	 */
0c0a4a517   Yasunori Goto   memory hotplug: f...
520
  	if (PageSlab(usemap_page)) {
ea01ea937   Badari Pulavarty   hotplug memory re...
521
522
523
524
525
526
527
  		kfree(usemap);
  		if (memmap)
  			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
  		return;
  	}
  
  	/*
0c0a4a517   Yasunori Goto   memory hotplug: f...
528
529
  	 * The usemap came from bootmem. This is packed with other usemaps
  	 * on the section which has pgdat at boot time. Just keep it as is now.
ea01ea937   Badari Pulavarty   hotplug memory re...
530
  	 */
0c0a4a517   Yasunori Goto   memory hotplug: f...
531
532
533
534
535
536
537
538
539
540
  
  	if (memmap) {
  		struct page *memmap_page;
  		memmap_page = virt_to_page(memmap);
  
  		nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
  			>> PAGE_SHIFT;
  
  		free_map_bootmem(memmap_page, nr_pages);
  	}
ea01ea937   Badari Pulavarty   hotplug memory re...
541
  }
29751f699   Andy Whitcroft   [PATCH] sparsemem...
542
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
543
544
545
546
   * returns the number of sections whose mem_maps were properly
   * set.  If this is <=0, then that means that the passed-in
   * map was not consumed and must be freed.
   */
31168481c   Al Viro   meminit section w...
547
  int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
0b0acbec1   Dave Hansen   [PATCH] memory ho...
548
  			   int nr_pages)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
549
  {
0b0acbec1   Dave Hansen   [PATCH] memory ho...
550
551
552
553
  	unsigned long section_nr = pfn_to_section_nr(start_pfn);
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	struct mem_section *ms;
  	struct page *memmap;
5c0e30664   Mel Gorman   Fix corruption of...
554
  	unsigned long *usemap;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
555
556
  	unsigned long flags;
  	int ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
557

0b0acbec1   Dave Hansen   [PATCH] memory ho...
558
559
560
561
  	/*
  	 * no locking for this, because it does its own
  	 * plus, it does a kmalloc
  	 */
bbd068259   WANG Cong   mm/sparse.c: impr...
562
563
564
  	ret = sparse_index_init(section_nr, pgdat->node_id);
  	if (ret < 0 && ret != -EEXIST)
  		return ret;
98f3cfc1d   Yasunori Goto   memory hotplug: H...
565
  	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
bbd068259   WANG Cong   mm/sparse.c: impr...
566
567
  	if (!memmap)
  		return -ENOMEM;
5c0e30664   Mel Gorman   Fix corruption of...
568
  	usemap = __kmalloc_section_usemap();
bbd068259   WANG Cong   mm/sparse.c: impr...
569
570
571
572
  	if (!usemap) {
  		__kfree_section_memmap(memmap, nr_pages);
  		return -ENOMEM;
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
573
574
  
  	pgdat_resize_lock(pgdat, &flags);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
575

0b0acbec1   Dave Hansen   [PATCH] memory ho...
576
577
578
579
580
  	ms = __pfn_to_section(start_pfn);
  	if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
  		ret = -EEXIST;
  		goto out;
  	}
5c0e30664   Mel Gorman   Fix corruption of...
581

29751f699   Andy Whitcroft   [PATCH] sparsemem...
582
  	ms->section_mem_map |= SECTION_MARKED_PRESENT;
5c0e30664   Mel Gorman   Fix corruption of...
583
  	ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
584

0b0acbec1   Dave Hansen   [PATCH] memory ho...
585
586
  out:
  	pgdat_resize_unlock(pgdat, &flags);
bbd068259   WANG Cong   mm/sparse.c: impr...
587
588
  	if (ret <= 0) {
  		kfree(usemap);
46a66eecd   Mike Kravetz   [PATCH] sparsemem...
589
  		__kfree_section_memmap(memmap, nr_pages);
bbd068259   WANG Cong   mm/sparse.c: impr...
590
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
591
  	return ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
592
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
  
  void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
  {
  	struct page *memmap = NULL;
  	unsigned long *usemap = NULL;
  
  	if (ms->section_mem_map) {
  		usemap = ms->pageblock_flags;
  		memmap = sparse_decode_mem_map(ms->section_mem_map,
  						__section_nr(ms));
  		ms->section_mem_map = 0;
  		ms->pageblock_flags = NULL;
  	}
  
  	free_section_usemap(memmap, usemap);
  }
a3142c8e1   Yasunori Goto   Fix section misma...
609
  #endif