Blame view

mm/memory_hotplug.c 20.8 KB
3947be196   Dave Hansen   [PATCH] memory ho...
1
2
3
4
5
  /*
   *  linux/mm/memory_hotplug.c
   *
   *  Copyright (C)
   */
3947be196   Dave Hansen   [PATCH] memory ho...
6
7
8
9
10
11
12
13
14
  #include <linux/stddef.h>
  #include <linux/mm.h>
  #include <linux/swap.h>
  #include <linux/interrupt.h>
  #include <linux/pagemap.h>
  #include <linux/bootmem.h>
  #include <linux/compiler.h>
  #include <linux/module.h>
  #include <linux/pagevec.h>
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
15
  #include <linux/writeback.h>
3947be196   Dave Hansen   [PATCH] memory ho...
16
17
18
19
20
21
22
  #include <linux/slab.h>
  #include <linux/sysctl.h>
  #include <linux/cpu.h>
  #include <linux/memory.h>
  #include <linux/memory_hotplug.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
23
  #include <linux/ioport.h>
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
24
25
26
  #include <linux/delay.h>
  #include <linux/migrate.h>
  #include <linux/page-isolation.h>
71088785c   Badari Pulavarty   mm: cleanup to ma...
27
  #include <linux/pfn.h>
3947be196   Dave Hansen   [PATCH] memory ho...
28
29
  
  #include <asm/tlbflush.h>
1e5ad9a3b   Adrian Bunk   mm/memory_hotplug...
30
  #include "internal.h"
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
31
32
33
34
35
36
37
38
39
40
  /* add this memory to iomem resource */
  static struct resource *register_memory_resource(u64 start, u64 size)
  {
  	struct resource *res;
  	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
  	BUG_ON(!res);
  
  	res->name = "System RAM";
  	res->start = start;
  	res->end = start + size - 1;
887c3cb18   Yasunori Goto   Add IORESOUCE_BUS...
41
  	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  	if (request_resource(&iomem_resource, res) < 0) {
  		printk("System RAM resource %llx - %llx cannot be added
  ",
  		(unsigned long long)res->start, (unsigned long long)res->end);
  		kfree(res);
  		res = NULL;
  	}
  	return res;
  }
  
  static void release_memory_resource(struct resource *res)
  {
  	if (!res)
  		return;
  	release_resource(res);
  	kfree(res);
  	return;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
60
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
047532787   Yasunori Goto   memory hotplug: r...
61
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
af370fb8c   Yasunori Goto   memory hotplug: s...
62
  static void get_page_bootmem(unsigned long info,  struct page *page, int type)
047532787   Yasunori Goto   memory hotplug: r...
63
  {
af370fb8c   Yasunori Goto   memory hotplug: s...
64
  	atomic_set(&page->_mapcount, type);
047532787   Yasunori Goto   memory hotplug: r...
65
66
67
68
69
70
71
  	SetPagePrivate(page);
  	set_page_private(page, info);
  	atomic_inc(&page->_count);
  }
  
  void put_page_bootmem(struct page *page)
  {
af370fb8c   Yasunori Goto   memory hotplug: s...
72
  	int type;
047532787   Yasunori Goto   memory hotplug: r...
73

af370fb8c   Yasunori Goto   memory hotplug: s...
74
75
  	type = atomic_read(&page->_mapcount);
  	BUG_ON(type >= -1);
047532787   Yasunori Goto   memory hotplug: r...
76
77
78
79
80
81
82
83
84
  
  	if (atomic_dec_return(&page->_count) == 1) {
  		ClearPagePrivate(page);
  		set_page_private(page, 0);
  		reset_page_mapcount(page);
  		__free_pages_bootmem(page, 0);
  	}
  
  }
d92bc3185   Adrian Bunk   mm: make register...
85
  static void register_page_bootmem_info_section(unsigned long start_pfn)
047532787   Yasunori Goto   memory hotplug: r...
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
  
  	if (!pfn_valid(start_pfn))
  		return;
  
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	/* Get section's memmap address */
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	/*
  	 * Get page for the memmap's phys address
  	 * XXX: need more consideration for sparse_vmemmap...
  	 */
  	page = virt_to_page(memmap);
  	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
  	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
  
  	/* remember memmap's page */
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, SECTION_INFO);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
af370fb8c   Yasunori Goto   memory hotplug: s...
118
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
047532787   Yasunori Goto   memory hotplug: r...
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  
  }
  
  void register_page_bootmem_info_node(struct pglist_data *pgdat)
  {
  	unsigned long i, pfn, end_pfn, nr_pages;
  	int node = pgdat->node_id;
  	struct page *page;
  	struct zone *zone;
  
  	nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
  	page = virt_to_page(pgdat);
  
  	for (i = 0; i < nr_pages; i++, page++)
  		get_page_bootmem(node, page, NODE_INFO);
  
  	zone = &pgdat->node_zones[0];
  	for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
  		if (zone->wait_table) {
  			nr_pages = zone->wait_table_hash_nr_entries
  				* sizeof(wait_queue_head_t);
  			nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
  			page = virt_to_page(zone->wait_table);
  
  			for (i = 0; i < nr_pages; i++, page++)
  				get_page_bootmem(node, page, NODE_INFO);
  		}
  	}
  
  	pfn = pgdat->node_start_pfn;
  	end_pfn = pfn + pgdat->node_spanned_pages;
  
  	/* register_section info */
  	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION)
  		register_page_bootmem_info_section(pfn);
  
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
76cdd58e5   Heiko Carstens   memory_hotplug: a...
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
  static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
  			   unsigned long end_pfn)
  {
  	unsigned long old_zone_end_pfn;
  
  	zone_span_writelock(zone);
  
  	old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
  	if (start_pfn < zone->zone_start_pfn)
  		zone->zone_start_pfn = start_pfn;
  
  	zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
  				zone->zone_start_pfn;
  
  	zone_span_writeunlock(zone);
  }
  
  static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
  			    unsigned long end_pfn)
  {
  	unsigned long old_pgdat_end_pfn =
  		pgdat->node_start_pfn + pgdat->node_spanned_pages;
  
  	if (start_pfn < pgdat->node_start_pfn)
  		pgdat->node_start_pfn = start_pfn;
  
  	pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
  					pgdat->node_start_pfn;
  }
31168481c   Al Viro   meminit section w...
186
  static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
3947be196   Dave Hansen   [PATCH] memory ho...
187
188
189
190
191
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nr_pages = PAGES_PER_SECTION;
  	int nid = pgdat->node_id;
  	int zone_type;
76cdd58e5   Heiko Carstens   memory_hotplug: a...
192
  	unsigned long flags;
3947be196   Dave Hansen   [PATCH] memory ho...
193
194
  
  	zone_type = zone - pgdat->node_zones;
76cdd58e5   Heiko Carstens   memory_hotplug: a...
195
196
197
198
199
200
201
202
203
204
205
206
207
  	if (!zone->wait_table) {
  		int ret;
  
  		ret = init_currently_empty_zone(zone, phys_start_pfn,
  						nr_pages, MEMMAP_HOTPLUG);
  		if (ret)
  			return ret;
  	}
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages);
  	grow_pgdat_span(zone->zone_pgdat, phys_start_pfn,
  			phys_start_pfn + nr_pages);
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
a2f3aa025   Dave Hansen   [PATCH] Fix spars...
208
209
  	memmap_init_zone(nr_pages, nid, zone_type,
  			 phys_start_pfn, MEMMAP_HOTPLUG);
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
210
  	return 0;
3947be196   Dave Hansen   [PATCH] memory ho...
211
  }
c04fc586c   Gary Hade   mm: show node to ...
212
213
  static int __meminit __add_section(int nid, struct zone *zone,
  					unsigned long phys_start_pfn)
3947be196   Dave Hansen   [PATCH] memory ho...
214
  {
3947be196   Dave Hansen   [PATCH] memory ho...
215
  	int nr_pages = PAGES_PER_SECTION;
3947be196   Dave Hansen   [PATCH] memory ho...
216
  	int ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
217
218
  	if (pfn_valid(phys_start_pfn))
  		return -EEXIST;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
219
  	ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
3947be196   Dave Hansen   [PATCH] memory ho...
220
221
222
  
  	if (ret < 0)
  		return ret;
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
223
224
225
226
  	ret = __add_zone(zone, phys_start_pfn);
  
  	if (ret < 0)
  		return ret;
c04fc586c   Gary Hade   mm: show node to ...
227
  	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
3947be196   Dave Hansen   [PATCH] memory ho...
228
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
229
230
231
232
233
234
235
236
237
238
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
  static int __remove_section(struct zone *zone, struct mem_section *ms)
  {
  	/*
  	 * XXX: Freeing memmap with vmemmap is not implement yet.
  	 *      This should be removed later.
  	 */
  	return -EBUSY;
  }
  #else
ea01ea937   Badari Pulavarty   hotplug memory re...
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
  static int __remove_section(struct zone *zone, struct mem_section *ms)
  {
  	unsigned long flags;
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int ret = -EINVAL;
  
  	if (!valid_section(ms))
  		return ret;
  
  	ret = unregister_memory_section(ms);
  	if (ret)
  		return ret;
  
  	pgdat_resize_lock(pgdat, &flags);
  	sparse_remove_one_section(zone, ms);
  	pgdat_resize_unlock(pgdat, &flags);
  	return 0;
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
257
  #endif
ea01ea937   Badari Pulavarty   hotplug memory re...
258

3947be196   Dave Hansen   [PATCH] memory ho...
259
260
261
262
263
264
  /*
   * Reasonably generic function for adding memory.  It is
   * expected that archs that support memory hotplug will
   * call this function after deciding the zone to which to
   * add the new pages.
   */
c04fc586c   Gary Hade   mm: show node to ...
265
266
  int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
  			unsigned long nr_pages)
3947be196   Dave Hansen   [PATCH] memory ho...
267
268
269
  {
  	unsigned long i;
  	int err = 0;
6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
270
271
272
273
  	int start_sec, end_sec;
  	/* during initialize mem_map, align hot-added range to section */
  	start_sec = pfn_to_section_nr(phys_start_pfn);
  	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
3947be196   Dave Hansen   [PATCH] memory ho...
274

6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
275
  	for (i = start_sec; i <= end_sec; i++) {
c04fc586c   Gary Hade   mm: show node to ...
276
  		err = __add_section(nid, zone, i << PFN_SECTION_SHIFT);
3947be196   Dave Hansen   [PATCH] memory ho...
277

6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
278
  		/*
183ff22bb   Simon Arlott   spelling fixes: mm/
279
  		 * EEXIST is finally dealt with by ioresource collision
6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
280
281
  		 * check. see add_memory() => register_memory_resource()
  		 * Warning will be printed if there is collision.
bed120c64   Joel H Schopp   [PATCH] spufs: fi...
282
283
  		 */
  		if (err && (err != -EEXIST))
3947be196   Dave Hansen   [PATCH] memory ho...
284
  			break;
6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
285
  		err = 0;
3947be196   Dave Hansen   [PATCH] memory ho...
286
287
288
289
  	}
  
  	return err;
  }
bed120c64   Joel H Schopp   [PATCH] spufs: fi...
290
  EXPORT_SYMBOL_GPL(__add_pages);
3947be196   Dave Hansen   [PATCH] memory ho...
291

ea01ea937   Badari Pulavarty   hotplug memory re...
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
  /**
   * __remove_pages() - remove sections of pages from a zone
   * @zone: zone from which pages need to be removed
   * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
   * @nr_pages: number of pages to remove (must be multiple of section size)
   *
   * Generic helper function to remove section mappings and sysfs entries
   * for the section of the memory we are removing. Caller needs to make
   * sure that pages are marked reserved and zones are adjust properly by
   * calling offline_pages().
   */
  int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
  		 unsigned long nr_pages)
  {
  	unsigned long i, ret = 0;
  	int sections_to_remove;
  
  	/*
  	 * We can only remove entire sections
  	 */
  	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
  	BUG_ON(nr_pages % PAGES_PER_SECTION);
ea01ea937   Badari Pulavarty   hotplug memory re...
314
315
316
  	sections_to_remove = nr_pages / PAGES_PER_SECTION;
  	for (i = 0; i < sections_to_remove; i++) {
  		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
de7f0cba9   Nathan Fontenot   memory hotplug: r...
317
318
  		release_mem_region(pfn << PAGE_SHIFT,
  				   PAGES_PER_SECTION << PAGE_SHIFT);
ea01ea937   Badari Pulavarty   hotplug memory re...
319
320
321
322
323
324
325
  		ret = __remove_section(zone, __pfn_to_section(pfn));
  		if (ret)
  			break;
  	}
  	return ret;
  }
  EXPORT_SYMBOL_GPL(__remove_pages);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
  void online_page(struct page *page)
  {
  	totalram_pages++;
  	num_physpages++;
  
  #ifdef CONFIG_HIGHMEM
  	if (PageHighMem(page))
  		totalhigh_pages++;
  #endif
  
  #ifdef CONFIG_FLATMEM
  	max_mapnr = max(page_to_pfn(page), max_mapnr);
  #endif
  
  	ClearPageReserved(page);
  	init_page_count(page);
  	__free_page(page);
  }
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
344
345
  static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
  			void *arg)
3947be196   Dave Hansen   [PATCH] memory ho...
346
347
  {
  	unsigned long i;
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  	unsigned long onlined_pages = *(unsigned long *)arg;
  	struct page *page;
  	if (PageReserved(pfn_to_page(start_pfn)))
  		for (i = 0; i < nr_pages; i++) {
  			page = pfn_to_page(start_pfn + i);
  			online_page(page);
  			onlined_pages++;
  		}
  	*(unsigned long *)arg = onlined_pages;
  	return 0;
  }
  
  
  int online_pages(unsigned long pfn, unsigned long nr_pages)
  {
3947be196   Dave Hansen   [PATCH] memory ho...
363
364
  	unsigned long onlined_pages = 0;
  	struct zone *zone;
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
365
  	int need_zonelists_rebuild = 0;
7b78d335a   Yasunori Goto   memory hotplug: r...
366
367
368
369
370
371
372
373
374
375
376
  	int nid;
  	int ret;
  	struct memory_notify arg;
  
  	arg.start_pfn = pfn;
  	arg.nr_pages = nr_pages;
  	arg.status_change_nid = -1;
  
  	nid = page_to_nid(pfn_to_page(pfn));
  	if (node_present_pages(nid) == 0)
  		arg.status_change_nid = nid;
3947be196   Dave Hansen   [PATCH] memory ho...
377

7b78d335a   Yasunori Goto   memory hotplug: r...
378
379
380
381
382
383
  	ret = memory_notify(MEM_GOING_ONLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret) {
  		memory_notify(MEM_CANCEL_ONLINE, &arg);
  		return ret;
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
384
385
386
  	/*
  	 * This doesn't need a lock to do pfn_to_page().
  	 * The section can't be removed here because of the
da19cbcf7   Daniel Walker   driver core: memo...
387
  	 * memory_block->state_mutex.
3947be196   Dave Hansen   [PATCH] memory ho...
388
389
  	 */
  	zone = page_zone(pfn_to_page(pfn));
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
390
391
392
393
394
395
396
  	/*
  	 * If this zone is not populated, then it is not in zonelist.
  	 * This means the page allocator ignores this zone.
  	 * So, zonelist must be updated after online.
  	 */
  	if (!populated_zone(zone))
  		need_zonelists_rebuild = 1;
fd8a4221a   Geoff Levand   memory_hotplug: c...
397
  	ret = walk_memory_resource(pfn, nr_pages, &onlined_pages,
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
398
  		online_pages_range);
fd8a4221a   Geoff Levand   memory_hotplug: c...
399
400
401
402
403
404
405
  	if (ret) {
  		printk(KERN_DEBUG "online_pages %lx at %lx failed
  ",
  			nr_pages, pfn);
  		memory_notify(MEM_CANCEL_ONLINE, &arg);
  		return ret;
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
406
  	zone->present_pages += onlined_pages;
f2937be58   Yasunori Goto   [PATCH] memory ho...
407
  	zone->zone_pgdat->node_present_pages += onlined_pages;
3947be196   Dave Hansen   [PATCH] memory ho...
408

61b13993a   Dave Hansen   [PATCH] memory ho...
409
  	setup_per_zone_pages_min();
7ea1530ab   Christoph Lameter   Memoryless nodes:...
410
411
412
413
  	if (onlined_pages) {
  		kswapd_run(zone_to_nid(zone));
  		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
  	}
61b13993a   Dave Hansen   [PATCH] memory ho...
414

6811378e7   Yasunori Goto   [PATCH] wait_tabl...
415
416
  	if (need_zonelists_rebuild)
  		build_all_zonelists();
2f7f24eca   Kent Liu   memory-hotplug: d...
417
418
  	else
  		vm_total_pages = nr_free_pagecache_pages();
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
419
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
420
421
422
  
  	if (onlined_pages)
  		memory_notify(MEM_ONLINE, &arg);
3947be196   Dave Hansen   [PATCH] memory ho...
423
424
  	return 0;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
425
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
426

9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
  static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
  {
  	struct pglist_data *pgdat;
  	unsigned long zones_size[MAX_NR_ZONES] = {0};
  	unsigned long zholes_size[MAX_NR_ZONES] = {0};
  	unsigned long start_pfn = start >> PAGE_SHIFT;
  
  	pgdat = arch_alloc_nodedata(nid);
  	if (!pgdat)
  		return NULL;
  
  	arch_refresh_nodedata(nid, pgdat);
  
  	/* we can use NODE_DATA(nid) from here */
  
  	/* init node's zones as empty zones, we don't have any present pages.*/
9109fb7b3   Johannes Weiner   mm: drop unneeded...
443
  	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
444
445
446
447
448
449
450
451
452
453
  
  	return pgdat;
  }
  
  static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
  {
  	arch_refresh_nodedata(nid, NULL);
  	arch_free_nodedata(pgdat);
  	return;
  }
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
454

31168481c   Al Viro   meminit section w...
455
456
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  int __ref add_memory(int nid, u64 start, u64 size)
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
457
  {
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
458
459
  	pg_data_t *pgdat = NULL;
  	int new_pgdat = 0;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
460
  	struct resource *res;
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
461
  	int ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
462
463
464
  	res = register_memory_resource(start, size);
  	if (!res)
  		return -EEXIST;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
465
466
467
468
469
  	if (!node_online(nid)) {
  		pgdat = hotadd_new_pgdat(nid, start);
  		if (!pgdat)
  			return -ENOMEM;
  		new_pgdat = 1;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
470
  	}
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
471
472
  	/* call arch's memory hotadd */
  	ret = arch_add_memory(nid, start, size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
473
474
  	if (ret < 0)
  		goto error;
0fc44159b   Yasunori Goto   [PATCH] Register ...
475
  	/* we online node here. we can't roll back from here. */
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
476
  	node_set_online(nid);
0fc44159b   Yasunori Goto   [PATCH] Register ...
477
478
479
480
481
482
483
484
485
  	if (new_pgdat) {
  		ret = register_one_node(nid);
  		/*
  		 * If sysfs file of new node can't create, cpu on the node
  		 * can't be hot-added. There is no rollback way now.
  		 * So, check by BUG_ON() to catch it reluctantly..
  		 */
  		BUG_ON(ret);
  	}
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
486
487
488
489
490
  	return ret;
  error:
  	/* rollback pgdat allocation and others */
  	if (new_pgdat)
  		rollback_node_hotadd(nid, pgdat);
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
491
492
  	if (res)
  		release_memory_resource(res);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
493

bc02af93d   Yasunori Goto   [PATCH] pgdat all...
494
495
496
  	return ret;
  }
  EXPORT_SYMBOL_GPL(add_memory);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
497
498
499
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  /*
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
   * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
   * set and the size of the free page is given by page_order(). Using this,
   * the function determines if the pageblock contains only free pages.
   * Due to buddy contraints, a free page at least the size of a pageblock will
   * be located at the start of the pageblock
   */
  static inline int pageblock_free(struct page *page)
  {
  	return PageBuddy(page) && page_order(page) >= pageblock_order;
  }
  
  /* Return the start of the next active pageblock after a given page */
  static struct page *next_active_pageblock(struct page *page)
  {
  	int pageblocks_stride;
  
  	/* Ensure the starting page is pageblock-aligned */
  	BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
  
  	/* Move forward by at least 1 * pageblock_nr_pages */
  	pageblocks_stride = 1;
  
  	/* If the entire pageblock is free, move to the end of free page */
  	if (pageblock_free(page))
  		pageblocks_stride += page_order(page) - pageblock_order;
  
  	return page + (pageblocks_stride * pageblock_nr_pages);
  }
  
  /* Checks if this range of memory is likely to be hot-removable. */
  int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
  {
  	int type;
  	struct page *page = pfn_to_page(start_pfn);
  	struct page *end_page = page + nr_pages;
  
  	/* Check the starting page of each pageblock within the range */
  	for (; page < end_page; page = next_active_pageblock(page)) {
  		type = get_pageblock_migratetype(page);
  
  		/*
  		 * A pageblock containing MOVABLE or free pages is considered
  		 * removable
  		 */
  		if (type != MIGRATE_MOVABLE && !pageblock_free(page))
  			return 0;
  
  		/*
  		 * A pageblock starting with a PageReserved page is not
  		 * considered removable.
  		 */
  		if (PageReserved(page))
  			return 0;
  	}
  
  	/* All pageblocks in the memory block are likely to be hot-removable */
  	return 1;
  }
  
  /*
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
   * Confirm all pages in a range [start, end) is belongs to the same zone.
   */
  static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct zone *zone = NULL;
  	struct page *page;
  	int i;
  	for (pfn = start_pfn;
  	     pfn < end_pfn;
  	     pfn += MAX_ORDER_NR_PAGES) {
  		i = 0;
  		/* This is just a CONFIG_HOLES_IN_ZONE check.*/
  		while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
  			i++;
  		if (i == MAX_ORDER_NR_PAGES)
  			continue;
  		page = pfn_to_page(pfn + i);
  		if (zone && page_zone(page) != zone)
  			return 0;
  		zone = page_zone(page);
  	}
  	return 1;
  }
  
  /*
   * Scanning pfn is much easier than scanning lru list.
   * Scan pfn from start to end and Find LRU page.
   */
  int scan_lru_pages(unsigned long start, unsigned long end)
  {
  	unsigned long pfn;
  	struct page *page;
  	for (pfn = start; pfn < end; pfn++) {
  		if (pfn_valid(pfn)) {
  			page = pfn_to_page(pfn);
  			if (PageLRU(page))
  				return pfn;
  		}
  	}
  	return 0;
  }
  
  static struct page *
3c1d43787   Hugh Dickins   mm: remove GFP_HI...
604
  hotremove_migrate_alloc(struct page *page, unsigned long private, int **x)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
605
  {
3c1d43787   Hugh Dickins   mm: remove GFP_HI...
606
607
  	/* This should be improooooved!! */
  	return alloc_page(GFP_HIGHUSER_MOVABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
608
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
  #define NR_OFFLINE_AT_ONCE_PAGES	(256)
  static int
  do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct page *page;
  	int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
  	int not_managed = 0;
  	int ret = 0;
  	LIST_HEAD(source);
  
  	for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
  		if (!pfn_valid(pfn))
  			continue;
  		page = pfn_to_page(pfn);
  		if (!page_count(page))
  			continue;
  		/*
  		 * We can skip free pages. And we can only deal with pages on
  		 * LRU.
  		 */
62695a84e   Nick Piggin   vmscan: move isol...
630
  		ret = isolate_lru_page(page);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
631
  		if (!ret) { /* Success */
62695a84e   Nick Piggin   vmscan: move isol...
632
  			list_add_tail(&page->lru, &source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
  			move_pages--;
  		} else {
  			/* Becasue we don't have big zone->lock. we should
  			   check this again here. */
  			if (page_count(page))
  				not_managed++;
  #ifdef CONFIG_DEBUG_VM
  			printk(KERN_INFO "removing from LRU failed"
  					 " %lx/%d/%lx
  ",
  				pfn, page_count(page), page->flags);
  #endif
  		}
  	}
  	ret = -EBUSY;
  	if (not_managed) {
  		if (!list_empty(&source))
  			putback_lru_pages(&source);
  		goto out;
  	}
  	ret = 0;
  	if (list_empty(&source))
  		goto out;
  	/* this function returns # of failed pages */
  	ret = migrate_pages(&source, hotremove_migrate_alloc, 0);
  
  out:
  	return ret;
  }
  
  /*
   * remove from free_area[] and mark all as Reserved.
   */
  static int
  offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
  			void *data)
  {
  	__offline_isolated_pages(start, start + nr_pages);
  	return 0;
  }
  
  static void
  offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
  	walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,
  				offline_isolated_pages_cb);
  }
  
  /*
   * Check all pages in range, recoreded as memory resource, are isolated.
   */
  static int
  check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
  			void *data)
  {
  	int ret;
  	long offlined = *(long *)data;
  	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages);
  	offlined = nr_pages;
  	if (!ret)
  		*(long *)data += offlined;
  	return ret;
  }
  
  static long
  check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
  {
  	long offlined = 0;
  	int ret;
  
  	ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,
  			check_pages_isolated_cb);
  	if (ret < 0)
  		offlined = (long)ret;
  	return offlined;
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
709
710
711
712
713
  int offline_pages(unsigned long start_pfn,
  		  unsigned long end_pfn, unsigned long timeout)
  {
  	unsigned long pfn, nr_pages, expire;
  	long offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
714
  	int ret, drain, retry_max, node;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
715
  	struct zone *zone;
7b78d335a   Yasunori Goto   memory hotplug: r...
716
  	struct memory_notify arg;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
717
718
719
720
721
722
723
724
725
726
727
  
  	BUG_ON(start_pfn >= end_pfn);
  	/* at least, alignment against pageblock is necessary */
  	if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	/* This makes hotplug much easier...and readable.
  	   we assume this for now. .*/
  	if (!test_pages_in_a_zone(start_pfn, end_pfn))
  		return -EINVAL;
7b78d335a   Yasunori Goto   memory hotplug: r...
728
729
730
731
  
  	zone = page_zone(pfn_to_page(start_pfn));
  	node = zone_to_nid(zone);
  	nr_pages = end_pfn - start_pfn;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
732
733
734
735
  	/* set above range as isolated */
  	ret = start_isolate_page_range(start_pfn, end_pfn);
  	if (ret)
  		return ret;
7b78d335a   Yasunori Goto   memory hotplug: r...
736
737
738
739
740
741
742
743
744
745
746
  
  	arg.start_pfn = start_pfn;
  	arg.nr_pages = nr_pages;
  	arg.status_change_nid = -1;
  	if (nr_pages >= node_present_pages(node))
  		arg.status_change_nid = node;
  
  	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret)
  		goto failed_removal;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
  	pfn = start_pfn;
  	expire = jiffies + timeout;
  	drain = 0;
  	retry_max = 5;
  repeat:
  	/* start memory hot removal */
  	ret = -EAGAIN;
  	if (time_after(jiffies, expire))
  		goto failed_removal;
  	ret = -EINTR;
  	if (signal_pending(current))
  		goto failed_removal;
  	ret = 0;
  	if (drain) {
  		lru_add_drain_all();
  		flush_scheduled_work();
  		cond_resched();
9f8f21725   Christoph Lameter   Page allocator: c...
764
  		drain_all_pages();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
  	}
  
  	pfn = scan_lru_pages(start_pfn, end_pfn);
  	if (pfn) { /* We have page on LRU */
  		ret = do_migrate_range(pfn, end_pfn);
  		if (!ret) {
  			drain = 1;
  			goto repeat;
  		} else {
  			if (ret < 0)
  				if (--retry_max == 0)
  					goto failed_removal;
  			yield();
  			drain = 1;
  			goto repeat;
  		}
  	}
  	/* drain all zone's lru pagevec, this is asyncronous... */
  	lru_add_drain_all();
  	flush_scheduled_work();
  	yield();
  	/* drain pcp pages , this is synchrouns. */
9f8f21725   Christoph Lameter   Page allocator: c...
787
  	drain_all_pages();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
788
789
790
791
792
793
794
795
796
797
798
  	/* check again */
  	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
  	if (offlined_pages < 0) {
  		ret = -EBUSY;
  		goto failed_removal;
  	}
  	printk(KERN_INFO "Offlined Pages %ld
  ", offlined_pages);
  	/* Ok, all of our target is islaoted.
  	   We cannot do rollback at this point. */
  	offline_isolated_pages(start_pfn, end_pfn);
dbc0e4cef   KAMEZAWA Hiroyuki   memory hotremove:...
799
800
  	/* reset pagetype flags and makes migrate type to be MOVABLE */
  	undo_isolate_page_range(start_pfn, end_pfn);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
801
  	/* removal success */
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
802
803
804
805
  	zone->present_pages -= offlined_pages;
  	zone->zone_pgdat->node_present_pages -= offlined_pages;
  	totalram_pages -= offlined_pages;
  	num_physpages -= offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
806

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
807
808
  	vm_total_pages = nr_free_pagecache_pages();
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
809
810
  
  	memory_notify(MEM_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
811
812
813
814
815
816
  	return 0;
  
  failed_removal:
  	printk(KERN_INFO "memory offlining %lx to %lx failed
  ",
  		start_pfn, end_pfn);
7b78d335a   Yasunori Goto   memory hotplug: r...
817
  	memory_notify(MEM_CANCEL_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
818
819
  	/* pushback to free area */
  	undo_isolate_page_range(start_pfn, end_pfn);
7b78d335a   Yasunori Goto   memory hotplug: r...
820

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
821
822
  	return ret;
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
823
824
825
826
827
828
829
830
831
  
  int remove_memory(u64 start, u64 size)
  {
  	unsigned long start_pfn, end_pfn;
  
  	start_pfn = PFN_DOWN(start);
  	end_pfn = start_pfn + PFN_DOWN(size);
  	return offline_pages(start_pfn, end_pfn, 120 * HZ);
  }
48e94196a   KAMEZAWA Hiroyuki   fix memory hot re...
832
833
834
835
836
  #else
  int remove_memory(u64 start, u64 size)
  {
  	return -EINVAL;
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
837
  #endif /* CONFIG_MEMORY_HOTREMOVE */
71088785c   Badari Pulavarty   mm: cleanup to ma...
838
  EXPORT_SYMBOL_GPL(remove_memory);