Blame view

mm/sparse.c 16.2 KB
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
1
2
3
  /*
   * sparse memory mappings.
   */
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
4
5
6
  #include <linux/mm.h>
  #include <linux/mmzone.h>
  #include <linux/bootmem.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
7
  #include <linux/highmem.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
8
  #include <linux/module.h>
28ae55c98   Dave Hansen   [PATCH] sparsemem...
9
  #include <linux/spinlock.h>
0b0acbec1   Dave Hansen   [PATCH] memory ho...
10
  #include <linux/vmalloc.h>
0c0a4a517   Yasunori Goto   memory hotplug: f...
11
  #include "internal.h"
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
12
  #include <asm/dma.h>
8f6aac419   Christoph Lameter   Generic Virtual M...
13
14
  #include <asm/pgalloc.h>
  #include <asm/pgtable.h>
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
15
16
17
18
19
20
  
  /*
   * Permanent SPARSEMEM data:
   *
   * 1) mem_section	- memory sections, mem_map's for valid memory
   */
3e347261a   Bob Picco   [PATCH] sparsemem...
21
  #ifdef CONFIG_SPARSEMEM_EXTREME
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
22
  struct mem_section *mem_section[NR_SECTION_ROOTS]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
23
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
24
25
  #else
  struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
22fc6eccb   Ravikiran G Thirumalai   [PATCH] Change ma...
26
  	____cacheline_internodealigned_in_smp;
3e347261a   Bob Picco   [PATCH] sparsemem...
27
28
  #endif
  EXPORT_SYMBOL(mem_section);
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
29
30
31
32
33
34
35
36
37
38
39
  #ifdef NODE_NOT_IN_PAGE_FLAGS
  /*
   * If we did not store the node number in the page then we have to
   * do a lookup in the section_to_node_table in order to find which
   * node the page belongs to.
   */
  #if MAX_NUMNODES <= 256
  static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #else
  static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  #endif
25ba77c14   Andy Whitcroft   [PATCH] numa node...
40
  int page_to_nid(struct page *page)
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
41
42
43
44
  {
  	return section_to_node_table[page_to_section(page)];
  }
  EXPORT_SYMBOL(page_to_nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
45
46
47
48
49
50
51
52
53
  
  static void set_section_nid(unsigned long section_nr, int nid)
  {
  	section_to_node_table[section_nr] = nid;
  }
  #else /* !NODE_NOT_IN_PAGE_FLAGS */
  static inline void set_section_nid(unsigned long section_nr, int nid)
  {
  }
89689ae7f   Christoph Lameter   [PATCH] Get rid o...
54
  #endif
3e347261a   Bob Picco   [PATCH] sparsemem...
55
  #ifdef CONFIG_SPARSEMEM_EXTREME
577a32f62   Sam Ravnborg   mm: fix section m...
56
  static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
28ae55c98   Dave Hansen   [PATCH] sparsemem...
57
58
59
60
  {
  	struct mem_section *section = NULL;
  	unsigned long array_size = SECTIONS_PER_ROOT *
  				   sizeof(struct mem_section);
39d24e642   Mike Kravetz   [PATCH] add slab_...
61
  	if (slab_is_available())
46a66eecd   Mike Kravetz   [PATCH] sparsemem...
62
63
64
  		section = kmalloc_node(array_size, GFP_KERNEL, nid);
  	else
  		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
28ae55c98   Dave Hansen   [PATCH] sparsemem...
65
66
67
68
69
  
  	if (section)
  		memset(section, 0, array_size);
  
  	return section;
3e347261a   Bob Picco   [PATCH] sparsemem...
70
  }
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
71

a3142c8e1   Yasunori Goto   Fix section misma...
72
  static int __meminit sparse_index_init(unsigned long section_nr, int nid)
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
73
  {
34af946a2   Ingo Molnar   [PATCH] spin/rwlo...
74
  	static DEFINE_SPINLOCK(index_init_lock);
28ae55c98   Dave Hansen   [PATCH] sparsemem...
75
76
77
  	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  	struct mem_section *section;
  	int ret = 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
78
79
  
  	if (mem_section[root])
28ae55c98   Dave Hansen   [PATCH] sparsemem...
80
  		return -EEXIST;
3e347261a   Bob Picco   [PATCH] sparsemem...
81

28ae55c98   Dave Hansen   [PATCH] sparsemem...
82
  	section = sparse_index_alloc(nid);
af0cd5a7c   WANG Cong   mm/sparse.c: chec...
83
84
  	if (!section)
  		return -ENOMEM;
28ae55c98   Dave Hansen   [PATCH] sparsemem...
85
86
87
88
89
  	/*
  	 * This lock keeps two different sections from
  	 * reallocating for the same index
  	 */
  	spin_lock(&index_init_lock);
3e347261a   Bob Picco   [PATCH] sparsemem...
90

28ae55c98   Dave Hansen   [PATCH] sparsemem...
91
92
93
94
95
96
97
98
99
100
101
102
103
104
  	if (mem_section[root]) {
  		ret = -EEXIST;
  		goto out;
  	}
  
  	mem_section[root] = section;
  out:
  	spin_unlock(&index_init_lock);
  	return ret;
  }
  #else /* !SPARSEMEM_EXTREME */
  static inline int sparse_index_init(unsigned long section_nr, int nid)
  {
  	return 0;
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
105
  }
28ae55c98   Dave Hansen   [PATCH] sparsemem...
106
  #endif
4ca644d97   Dave Hansen   [PATCH] memory ho...
107
108
  /*
   * Although written for the SPARSEMEM_EXTREME case, this happens
cd881a6b2   Andy Whitcroft   sparsemem: clean ...
109
   * to also work for the flat array case because
4ca644d97   Dave Hansen   [PATCH] memory ho...
110
111
112
113
114
115
   * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
   */
  int __section_nr(struct mem_section* ms)
  {
  	unsigned long root_nr;
  	struct mem_section* root;
12783b002   Mike Kravetz   [PATCH] SPARSEMEM...
116
117
  	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
  		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
4ca644d97   Dave Hansen   [PATCH] memory ho...
118
119
120
121
122
123
124
125
126
  		if (!root)
  			continue;
  
  		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
  		     break;
  	}
  
  	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
  }
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
  /*
   * During early boot, before section_mem_map is used for an actual
   * mem_map, we use section_mem_map to store the section's NUMA
   * node.  This keeps us from having to use another data structure.  The
   * node information is cleared just before we store the real mem_map.
   */
  static inline unsigned long sparse_encode_early_nid(int nid)
  {
  	return (nid << SECTION_NID_SHIFT);
  }
  
  static inline int sparse_early_nid(struct mem_section *section)
  {
  	return (section->section_mem_map >> SECTION_NID_SHIFT);
  }
2dbb51c49   Mel Gorman   mm: make defensiv...
142
143
144
  /* Validate the physical addressing limitations of the model */
  void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
  						unsigned long *end_pfn)
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
145
  {
2dbb51c49   Mel Gorman   mm: make defensiv...
146
  	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
147

bead9a3ab   Ingo Molnar   mm: sparsemem mem...
148
149
150
151
  	/*
  	 * Sanity checks - do not allow an architecture to pass
  	 * in larger pfns than the maximum scope of sparsemem:
  	 */
2dbb51c49   Mel Gorman   mm: make defensiv...
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
  	if (*start_pfn > max_sparsemem_pfn) {
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*start_pfn = max_sparsemem_pfn;
  		*end_pfn = max_sparsemem_pfn;
  	}
  
  	if (*end_pfn > max_sparsemem_pfn) {
  		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
  			"End of range %lu -> %lu exceeds SPARSEMEM max %lu
  ",
  			*start_pfn, *end_pfn, max_sparsemem_pfn);
  		WARN_ON_ONCE(1);
  		*end_pfn = max_sparsemem_pfn;
  	}
  }
  
  /* Record a memory area against a node. */
  void __init memory_present(int nid, unsigned long start, unsigned long end)
  {
  	unsigned long pfn;
bead9a3ab   Ingo Molnar   mm: sparsemem mem...
176

d41dee369   Andy Whitcroft   [PATCH] sparsemem...
177
  	start &= PAGE_SECTION_MASK;
2dbb51c49   Mel Gorman   mm: make defensiv...
178
  	mminit_validate_memmodel_limits(&start, &end);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
179
180
  	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
  		unsigned long section = pfn_to_section_nr(pfn);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
181
182
183
  		struct mem_section *ms;
  
  		sparse_index_init(section, nid);
85770ffe4   Andy Whitcroft   sparsemem: ensure...
184
  		set_section_nid(section, nid);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
185
186
187
  
  		ms = __nr_to_section(section);
  		if (!ms->section_mem_map)
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
188
189
  			ms->section_mem_map = sparse_encode_early_nid(nid) |
  							SECTION_MARKED_PRESENT;
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
190
191
192
193
194
195
196
197
198
199
200
201
  	}
  }
  
  /*
   * Only used by the i386 NUMA architecures, but relatively
   * generic code.
   */
  unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
  						     unsigned long end_pfn)
  {
  	unsigned long pfn;
  	unsigned long nr_pages = 0;
2dbb51c49   Mel Gorman   mm: make defensiv...
202
  	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
203
204
205
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		if (nid != early_pfn_to_nid(pfn))
  			continue;
540557b94   Andy Whitcroft   sparsemem: record...
206
  		if (pfn_present(pfn))
d41dee369   Andy Whitcroft   [PATCH] sparsemem...
207
208
209
210
211
212
213
  			nr_pages += PAGES_PER_SECTION;
  	}
  
  	return nr_pages * sizeof(struct page);
  }
  
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
214
215
216
217
218
219
220
221
222
223
   * Subtle, we encode the real pfn into the mem_map such that
   * the identity pfn - section_mem_map will return the actual
   * physical page frame number.
   */
  static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
  {
  	return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
  }
  
  /*
ea01ea937   Badari Pulavarty   hotplug memory re...
224
   * Decode mem_map from the coded memmap
29751f699   Andy Whitcroft   [PATCH] sparsemem...
225
   */
29751f699   Andy Whitcroft   [PATCH] sparsemem...
226
227
  struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
  {
ea01ea937   Badari Pulavarty   hotplug memory re...
228
229
  	/* mask off the extra low bits of information */
  	coded_mem_map &= SECTION_MAP_MASK;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
230
231
  	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
  }
a3142c8e1   Yasunori Goto   Fix section misma...
232
  static int __meminit sparse_init_one_section(struct mem_section *ms,
5c0e30664   Mel Gorman   Fix corruption of...
233
234
  		unsigned long pnum, struct page *mem_map,
  		unsigned long *pageblock_bitmap)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
235
  {
540557b94   Andy Whitcroft   sparsemem: record...
236
  	if (!present_section(ms))
29751f699   Andy Whitcroft   [PATCH] sparsemem...
237
  		return -EINVAL;
30c253e6d   Andy Whitcroft   [PATCH] sparsemem...
238
  	ms->section_mem_map &= ~SECTION_MAP_MASK;
540557b94   Andy Whitcroft   sparsemem: record...
239
240
  	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
  							SECTION_HAS_MEM_MAP;
5c0e30664   Mel Gorman   Fix corruption of...
241
   	ms->pageblock_flags = pageblock_bitmap;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
242
243
244
  
  	return 1;
  }
047532787   Yasunori Goto   memory hotplug: r...
245
  unsigned long usemap_size(void)
5c0e30664   Mel Gorman   Fix corruption of...
246
247
248
249
250
251
252
253
254
255
256
257
258
  {
  	unsigned long size_bytes;
  	size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
  	size_bytes = roundup(size_bytes, sizeof(unsigned long));
  	return size_bytes;
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
  static unsigned long *__kmalloc_section_usemap(void)
  {
  	return kmalloc(usemap_size(), GFP_KERNEL);
  }
  #endif /* CONFIG_MEMORY_HOTPLUG */
48c906823   Yasunori Goto   memory hotplug: a...
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
  #ifdef CONFIG_MEMORY_HOTREMOVE
  static unsigned long * __init
  sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
  {
  	unsigned long section_nr;
  
  	/*
  	 * A page may contain usemaps for other sections preventing the
  	 * page being freed and making a section unremovable while
  	 * other sections referencing the usemap retmain active. Similarly,
  	 * a pgdat can prevent a section being removed. If section A
  	 * contains a pgdat and section B contains the usemap, both
  	 * sections become inter-dependent. This allocates usemaps
  	 * from the same section as the pgdat where possible to avoid
  	 * this problem.
  	 */
  	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
  	return alloc_bootmem_section(usemap_size(), section_nr);
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  	unsigned long usemap_snr, pgdat_snr;
  	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
  	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int usemap_nid;
  
  	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
  	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
  	if (usemap_snr == pgdat_snr)
  		return;
  
  	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
  		/* skip redundant message */
  		return;
  
  	old_usemap_snr = usemap_snr;
  	old_pgdat_snr = pgdat_snr;
  
  	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
  	if (usemap_nid != nid) {
  		printk(KERN_INFO
  		       "node %d must be removed before remove section %ld
  ",
  		       nid, usemap_snr);
  		return;
  	}
  	/*
  	 * There is a circular dependency.
  	 * Some platforms allow un-removable section because they will just
  	 * gather other removable sections for dynamic partitioning.
  	 * Just notify un-removable section's number here.
  	 */
  	printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr,
  	       pgdat_snr, nid);
  	printk(KERN_CONT
  	       " have a circular dependency on usemap and pgdat allocations
  ");
  }
  #else
  static unsigned long * __init
  sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
  {
  	return NULL;
  }
  
  static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
  {
  }
  #endif /* CONFIG_MEMORY_HOTREMOVE */
a322f8ab6   Sam Ravnborg   mm: fix section m...
330
  static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
5c0e30664   Mel Gorman   Fix corruption of...
331
  {
516746444   Andrew Morton   revert "memory ho...
332
  	unsigned long *usemap;
5c0e30664   Mel Gorman   Fix corruption of...
333
334
  	struct mem_section *ms = __nr_to_section(pnum);
  	int nid = sparse_early_nid(ms);
48c906823   Yasunori Goto   memory hotplug: a...
335
  	usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid));
5c0e30664   Mel Gorman   Fix corruption of...
336
337
  	if (usemap)
  		return usemap;
48c906823   Yasunori Goto   memory hotplug: a...
338
339
340
341
342
  	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
  	if (usemap) {
  		check_usemap_section_nr(nid, usemap);
  		return usemap;
  	}
5c0e30664   Mel Gorman   Fix corruption of...
343
344
  	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
  	nid = 0;
d40cee245   Harvey Harrison   mm: remove remain...
345
346
  	printk(KERN_WARNING "%s: allocation failed
  ", __func__);
5c0e30664   Mel Gorman   Fix corruption of...
347
348
  	return NULL;
  }
8f6aac419   Christoph Lameter   Generic Virtual M...
349
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
98f3cfc1d   Yasunori Goto   memory hotplug: H...
350
  struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
351
352
  {
  	struct page *map;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
353
354
355
356
  
  	map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
  	if (map)
  		return map;
9d99217a0   Yasunori Goto   memory hotplug: a...
357
358
  	map = alloc_bootmem_pages_node(NODE_DATA(nid),
  		       PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION));
8f6aac419   Christoph Lameter   Generic Virtual M...
359
360
361
  	return map;
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
9e5c6da71   Adrian Bunk   make mm/sparse.c:...
362
  static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
8f6aac419   Christoph Lameter   Generic Virtual M...
363
364
365
366
  {
  	struct page *map;
  	struct mem_section *ms = __nr_to_section(pnum);
  	int nid = sparse_early_nid(ms);
98f3cfc1d   Yasunori Goto   memory hotplug: H...
367
  	map = sparse_mem_map_populate(pnum, nid);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
368
369
  	if (map)
  		return map;
8f6aac419   Christoph Lameter   Generic Virtual M...
370
  	printk(KERN_ERR "%s: sparsemem memory map backing failed "
d40cee245   Harvey Harrison   mm: remove remain...
371
372
  			"some memory will not be available.
  ", __func__);
802f192e4   Bob Picco   [PATCH] SPARSEMEM...
373
  	ms->section_mem_map = 0;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
374
375
  	return NULL;
  }
c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
376
377
378
  void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
  {
  }
193faea92   Stephen Rothwell   Move three functi...
379
380
381
382
383
384
385
386
  /*
   * Allocate the accumulated non-linear sections, allocate a mem_map
   * for each and record the physical to section mapping.
   */
  void __init sparse_init(void)
  {
  	unsigned long pnum;
  	struct page *map;
5c0e30664   Mel Gorman   Fix corruption of...
387
  	unsigned long *usemap;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
  	unsigned long **usemap_map;
  	int size;
  
  	/*
  	 * map is using big page (aka 2M in x86 64 bit)
  	 * usemap is less one page (aka 24 bytes)
  	 * so alloc 2M (with 2M align) and 24 bytes in turn will
  	 * make next 2M slip to one more 2M later.
  	 * then in big system, the memory will have a lot of holes...
  	 * here try to allocate 2M pages continously.
  	 *
  	 * powerpc need to call sparse_init_one_section right after each
  	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
  	 */
  	size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
  	usemap_map = alloc_bootmem(size);
  	if (!usemap_map)
  		panic("can not allocate usemap_map
  ");
193faea92   Stephen Rothwell   Move three functi...
407
408
  
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
540557b94   Andy Whitcroft   sparsemem: record...
409
  		if (!present_section_nr(pnum))
193faea92   Stephen Rothwell   Move three functi...
410
  			continue;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
411
412
  		usemap_map[pnum] = sparse_early_usemap_alloc(pnum);
  	}
193faea92   Stephen Rothwell   Move three functi...
413

e123dd3f0   Yinghai Lu   mm: make mem_map ...
414
415
  	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
  		if (!present_section_nr(pnum))
193faea92   Stephen Rothwell   Move three functi...
416
  			continue;
5c0e30664   Mel Gorman   Fix corruption of...
417

e123dd3f0   Yinghai Lu   mm: make mem_map ...
418
  		usemap = usemap_map[pnum];
5c0e30664   Mel Gorman   Fix corruption of...
419
420
  		if (!usemap)
  			continue;
e123dd3f0   Yinghai Lu   mm: make mem_map ...
421
422
423
  		map = sparse_early_mem_map_alloc(pnum);
  		if (!map)
  			continue;
5c0e30664   Mel Gorman   Fix corruption of...
424
425
  		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
  								usemap);
193faea92   Stephen Rothwell   Move three functi...
426
  	}
e123dd3f0   Yinghai Lu   mm: make mem_map ...
427

c2b91e2ee   Yinghai Lu   x86_64/mm: check ...
428
  	vmemmap_populate_print_last();
e123dd3f0   Yinghai Lu   mm: make mem_map ...
429
  	free_bootmem(__pa(usemap_map), size);
193faea92   Stephen Rothwell   Move three functi...
430
431
432
  }
  
  #ifdef CONFIG_MEMORY_HOTPLUG
98f3cfc1d   Yasunori Goto   memory hotplug: H...
433
434
435
436
437
438
439
440
441
442
443
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
  						 unsigned long nr_pages)
  {
  	/* This will make the necessary allocations eventually. */
  	return sparse_mem_map_populate(pnum, nid);
  }
  static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
  {
  	return; /* XXX: Not implemented yet */
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
444
445
446
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
447
  #else
0b0acbec1   Dave Hansen   [PATCH] memory ho...
448
449
450
451
  static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
  {
  	struct page *page, *ret;
  	unsigned long memmap_size = sizeof(struct page) * nr_pages;
f2d0aa5bf   Yasunori Goto   [PATCH] memory ho...
452
  	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
0b0acbec1   Dave Hansen   [PATCH] memory ho...
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
  	if (page)
  		goto got_map_page;
  
  	ret = vmalloc(memmap_size);
  	if (ret)
  		goto got_map_ptr;
  
  	return NULL;
  got_map_page:
  	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
  got_map_ptr:
  	memset(ret, 0, memmap_size);
  
  	return ret;
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
468
469
470
471
472
  static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
  						  unsigned long nr_pages)
  {
  	return __kmalloc_section_memmap(nr_pages);
  }
0b0acbec1   Dave Hansen   [PATCH] memory ho...
473
474
  static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
  {
9e2779fa2   Christoph Lameter   is_vmalloc_addr()...
475
  	if (is_vmalloc_addr(memmap))
0b0acbec1   Dave Hansen   [PATCH] memory ho...
476
477
478
479
480
  		vfree(memmap);
  	else
  		free_pages((unsigned long)memmap,
  			   get_order(sizeof(struct page) * nr_pages));
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
  
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
  	unsigned long maps_section_nr, removing_section_nr, i;
  	int magic;
  
  	for (i = 0; i < nr_pages; i++, page++) {
  		magic = atomic_read(&page->_mapcount);
  
  		BUG_ON(magic == NODE_INFO);
  
  		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
  		removing_section_nr = page->private;
  
  		/*
  		 * When this function is called, the removing section is
  		 * logical offlined state. This means all pages are isolated
  		 * from page allocator. If removing section's memmap is placed
  		 * on the same section, it must not be freed.
  		 * If it is freed, page allocator may allocate it which will
  		 * be removed physically soon.
  		 */
  		if (maps_section_nr != removing_section_nr)
  			put_page_bootmem(page);
  	}
  }
98f3cfc1d   Yasunori Goto   memory hotplug: H...
507
  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
0b0acbec1   Dave Hansen   [PATCH] memory ho...
508

ea01ea937   Badari Pulavarty   hotplug memory re...
509
510
  static void free_section_usemap(struct page *memmap, unsigned long *usemap)
  {
0c0a4a517   Yasunori Goto   memory hotplug: f...
511
512
  	struct page *usemap_page;
  	unsigned long nr_pages;
ea01ea937   Badari Pulavarty   hotplug memory re...
513
514
  	if (!usemap)
  		return;
0c0a4a517   Yasunori Goto   memory hotplug: f...
515
  	usemap_page = virt_to_page(usemap);
ea01ea937   Badari Pulavarty   hotplug memory re...
516
517
518
  	/*
  	 * Check to see if allocation came from hot-plug-add
  	 */
0c0a4a517   Yasunori Goto   memory hotplug: f...
519
  	if (PageSlab(usemap_page)) {
ea01ea937   Badari Pulavarty   hotplug memory re...
520
521
522
523
524
525
526
  		kfree(usemap);
  		if (memmap)
  			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
  		return;
  	}
  
  	/*
0c0a4a517   Yasunori Goto   memory hotplug: f...
527
528
  	 * The usemap came from bootmem. This is packed with other usemaps
  	 * on the section which has pgdat at boot time. Just keep it as is now.
ea01ea937   Badari Pulavarty   hotplug memory re...
529
  	 */
0c0a4a517   Yasunori Goto   memory hotplug: f...
530
531
532
533
534
535
536
537
538
539
  
  	if (memmap) {
  		struct page *memmap_page;
  		memmap_page = virt_to_page(memmap);
  
  		nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
  			>> PAGE_SHIFT;
  
  		free_map_bootmem(memmap_page, nr_pages);
  	}
ea01ea937   Badari Pulavarty   hotplug memory re...
540
  }
29751f699   Andy Whitcroft   [PATCH] sparsemem...
541
  /*
29751f699   Andy Whitcroft   [PATCH] sparsemem...
542
543
544
545
   * returns the number of sections whose mem_maps were properly
   * set.  If this is <=0, then that means that the passed-in
   * map was not consumed and must be freed.
   */
31168481c   Al Viro   meminit section w...
546
  int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
0b0acbec1   Dave Hansen   [PATCH] memory ho...
547
  			   int nr_pages)
29751f699   Andy Whitcroft   [PATCH] sparsemem...
548
  {
0b0acbec1   Dave Hansen   [PATCH] memory ho...
549
550
551
552
  	unsigned long section_nr = pfn_to_section_nr(start_pfn);
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	struct mem_section *ms;
  	struct page *memmap;
5c0e30664   Mel Gorman   Fix corruption of...
553
  	unsigned long *usemap;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
554
555
  	unsigned long flags;
  	int ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
556

0b0acbec1   Dave Hansen   [PATCH] memory ho...
557
558
559
560
  	/*
  	 * no locking for this, because it does its own
  	 * plus, it does a kmalloc
  	 */
bbd068259   WANG Cong   mm/sparse.c: impr...
561
562
563
  	ret = sparse_index_init(section_nr, pgdat->node_id);
  	if (ret < 0 && ret != -EEXIST)
  		return ret;
98f3cfc1d   Yasunori Goto   memory hotplug: H...
564
  	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
bbd068259   WANG Cong   mm/sparse.c: impr...
565
566
  	if (!memmap)
  		return -ENOMEM;
5c0e30664   Mel Gorman   Fix corruption of...
567
  	usemap = __kmalloc_section_usemap();
bbd068259   WANG Cong   mm/sparse.c: impr...
568
569
570
571
  	if (!usemap) {
  		__kfree_section_memmap(memmap, nr_pages);
  		return -ENOMEM;
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
572
573
  
  	pgdat_resize_lock(pgdat, &flags);
29751f699   Andy Whitcroft   [PATCH] sparsemem...
574

0b0acbec1   Dave Hansen   [PATCH] memory ho...
575
576
577
578
579
  	ms = __pfn_to_section(start_pfn);
  	if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
  		ret = -EEXIST;
  		goto out;
  	}
5c0e30664   Mel Gorman   Fix corruption of...
580

29751f699   Andy Whitcroft   [PATCH] sparsemem...
581
  	ms->section_mem_map |= SECTION_MARKED_PRESENT;
5c0e30664   Mel Gorman   Fix corruption of...
582
  	ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
0b0acbec1   Dave Hansen   [PATCH] memory ho...
583

0b0acbec1   Dave Hansen   [PATCH] memory ho...
584
585
  out:
  	pgdat_resize_unlock(pgdat, &flags);
bbd068259   WANG Cong   mm/sparse.c: impr...
586
587
  	if (ret <= 0) {
  		kfree(usemap);
46a66eecd   Mike Kravetz   [PATCH] sparsemem...
588
  		__kfree_section_memmap(memmap, nr_pages);
bbd068259   WANG Cong   mm/sparse.c: impr...
589
  	}
0b0acbec1   Dave Hansen   [PATCH] memory ho...
590
  	return ret;
29751f699   Andy Whitcroft   [PATCH] sparsemem...
591
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
  
  void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
  {
  	struct page *memmap = NULL;
  	unsigned long *usemap = NULL;
  
  	if (ms->section_mem_map) {
  		usemap = ms->pageblock_flags;
  		memmap = sparse_decode_mem_map(ms->section_mem_map,
  						__section_nr(ms));
  		ms->section_mem_map = 0;
  		ms->pageblock_flags = NULL;
  	}
  
  	free_section_usemap(memmap, usemap);
  }
a3142c8e1   Yasunori Goto   Fix section misma...
608
  #endif