Blame view

mm/memory_hotplug.c 22.1 KB
3947be196   Dave Hansen   [PATCH] memory ho...
1
2
3
4
5
  /*
   *  linux/mm/memory_hotplug.c
   *
   *  Copyright (C)
   */
3947be196   Dave Hansen   [PATCH] memory ho...
6
7
8
9
10
11
12
13
14
  #include <linux/stddef.h>
  #include <linux/mm.h>
  #include <linux/swap.h>
  #include <linux/interrupt.h>
  #include <linux/pagemap.h>
  #include <linux/bootmem.h>
  #include <linux/compiler.h>
  #include <linux/module.h>
  #include <linux/pagevec.h>
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
15
  #include <linux/writeback.h>
3947be196   Dave Hansen   [PATCH] memory ho...
16
17
18
19
20
21
22
  #include <linux/slab.h>
  #include <linux/sysctl.h>
  #include <linux/cpu.h>
  #include <linux/memory.h>
  #include <linux/memory_hotplug.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
23
  #include <linux/ioport.h>
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
24
25
26
  #include <linux/delay.h>
  #include <linux/migrate.h>
  #include <linux/page-isolation.h>
71088785c   Badari Pulavarty   mm: cleanup to ma...
27
  #include <linux/pfn.h>
6ad696d2c   Andi Kleen   mm: allow memory ...
28
  #include <linux/suspend.h>
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
29
  #include <linux/mm_inline.h>
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
30
  #include <linux/firmware-map.h>
3947be196   Dave Hansen   [PATCH] memory ho...
31
32
  
  #include <asm/tlbflush.h>
1e5ad9a3b   Adrian Bunk   mm/memory_hotplug...
33
  #include "internal.h"
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
34
35
36
37
38
39
40
41
42
43
  /* add this memory to iomem resource */
  static struct resource *register_memory_resource(u64 start, u64 size)
  {
  	struct resource *res;
  	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
  	BUG_ON(!res);
  
  	res->name = "System RAM";
  	res->start = start;
  	res->end = start + size - 1;
887c3cb18   Yasunori Goto   Add IORESOUCE_BUS...
44
  	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
  	if (request_resource(&iomem_resource, res) < 0) {
  		printk("System RAM resource %llx - %llx cannot be added
  ",
  		(unsigned long long)res->start, (unsigned long long)res->end);
  		kfree(res);
  		res = NULL;
  	}
  	return res;
  }
  
  static void release_memory_resource(struct resource *res)
  {
  	if (!res)
  		return;
  	release_resource(res);
  	kfree(res);
  	return;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
63
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
047532787   Yasunori Goto   memory hotplug: r...
64
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
af370fb8c   Yasunori Goto   memory hotplug: s...
65
  static void get_page_bootmem(unsigned long info,  struct page *page, int type)
047532787   Yasunori Goto   memory hotplug: r...
66
  {
af370fb8c   Yasunori Goto   memory hotplug: s...
67
  	atomic_set(&page->_mapcount, type);
047532787   Yasunori Goto   memory hotplug: r...
68
69
70
71
  	SetPagePrivate(page);
  	set_page_private(page, info);
  	atomic_inc(&page->_count);
  }
23ce932a5   Rakib Mullick   mm: fix section m...
72
73
74
  /* reference to __meminit __free_pages_bootmem is valid
   * so use __ref to tell modpost not to generate a warning */
  void __ref put_page_bootmem(struct page *page)
047532787   Yasunori Goto   memory hotplug: r...
75
  {
af370fb8c   Yasunori Goto   memory hotplug: s...
76
  	int type;
047532787   Yasunori Goto   memory hotplug: r...
77

af370fb8c   Yasunori Goto   memory hotplug: s...
78
79
  	type = atomic_read(&page->_mapcount);
  	BUG_ON(type >= -1);
047532787   Yasunori Goto   memory hotplug: r...
80
81
82
83
84
85
86
87
88
  
  	if (atomic_dec_return(&page->_count) == 1) {
  		ClearPagePrivate(page);
  		set_page_private(page, 0);
  		reset_page_mapcount(page);
  		__free_pages_bootmem(page, 0);
  	}
  
  }
d92bc3185   Adrian Bunk   mm: make register...
89
  static void register_page_bootmem_info_section(unsigned long start_pfn)
047532787   Yasunori Goto   memory hotplug: r...
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
  
  	if (!pfn_valid(start_pfn))
  		return;
  
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	/* Get section's memmap address */
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	/*
  	 * Get page for the memmap's phys address
  	 * XXX: need more consideration for sparse_vmemmap...
  	 */
  	page = virt_to_page(memmap);
  	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
  	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
  
  	/* remember memmap's page */
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, SECTION_INFO);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
af370fb8c   Yasunori Goto   memory hotplug: s...
122
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
047532787   Yasunori Goto   memory hotplug: r...
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  
  }
  
  void register_page_bootmem_info_node(struct pglist_data *pgdat)
  {
  	unsigned long i, pfn, end_pfn, nr_pages;
  	int node = pgdat->node_id;
  	struct page *page;
  	struct zone *zone;
  
  	nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
  	page = virt_to_page(pgdat);
  
  	for (i = 0; i < nr_pages; i++, page++)
  		get_page_bootmem(node, page, NODE_INFO);
  
  	zone = &pgdat->node_zones[0];
  	for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
  		if (zone->wait_table) {
  			nr_pages = zone->wait_table_hash_nr_entries
  				* sizeof(wait_queue_head_t);
  			nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
  			page = virt_to_page(zone->wait_table);
  
  			for (i = 0; i < nr_pages; i++, page++)
  				get_page_bootmem(node, page, NODE_INFO);
  		}
  	}
  
  	pfn = pgdat->node_start_pfn;
  	end_pfn = pfn + pgdat->node_spanned_pages;
  
  	/* register_section info */
  	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION)
  		register_page_bootmem_info_section(pfn);
  
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
76cdd58e5   Heiko Carstens   memory_hotplug: a...
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
  static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
  			   unsigned long end_pfn)
  {
  	unsigned long old_zone_end_pfn;
  
  	zone_span_writelock(zone);
  
  	old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
  	if (start_pfn < zone->zone_start_pfn)
  		zone->zone_start_pfn = start_pfn;
  
  	zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
  				zone->zone_start_pfn;
  
  	zone_span_writeunlock(zone);
  }
  
  static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
  			    unsigned long end_pfn)
  {
  	unsigned long old_pgdat_end_pfn =
  		pgdat->node_start_pfn + pgdat->node_spanned_pages;
  
  	if (start_pfn < pgdat->node_start_pfn)
  		pgdat->node_start_pfn = start_pfn;
  
  	pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
  					pgdat->node_start_pfn;
  }
31168481c   Al Viro   meminit section w...
190
  static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
3947be196   Dave Hansen   [PATCH] memory ho...
191
192
193
194
195
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nr_pages = PAGES_PER_SECTION;
  	int nid = pgdat->node_id;
  	int zone_type;
76cdd58e5   Heiko Carstens   memory_hotplug: a...
196
  	unsigned long flags;
3947be196   Dave Hansen   [PATCH] memory ho...
197
198
  
  	zone_type = zone - pgdat->node_zones;
76cdd58e5   Heiko Carstens   memory_hotplug: a...
199
200
201
202
203
204
205
206
207
208
209
210
211
  	if (!zone->wait_table) {
  		int ret;
  
  		ret = init_currently_empty_zone(zone, phys_start_pfn,
  						nr_pages, MEMMAP_HOTPLUG);
  		if (ret)
  			return ret;
  	}
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages);
  	grow_pgdat_span(zone->zone_pgdat, phys_start_pfn,
  			phys_start_pfn + nr_pages);
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
a2f3aa025   Dave Hansen   [PATCH] Fix spars...
212
213
  	memmap_init_zone(nr_pages, nid, zone_type,
  			 phys_start_pfn, MEMMAP_HOTPLUG);
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
214
  	return 0;
3947be196   Dave Hansen   [PATCH] memory ho...
215
  }
c04fc586c   Gary Hade   mm: show node to ...
216
217
  static int __meminit __add_section(int nid, struct zone *zone,
  					unsigned long phys_start_pfn)
3947be196   Dave Hansen   [PATCH] memory ho...
218
  {
3947be196   Dave Hansen   [PATCH] memory ho...
219
  	int nr_pages = PAGES_PER_SECTION;
3947be196   Dave Hansen   [PATCH] memory ho...
220
  	int ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
221
222
  	if (pfn_valid(phys_start_pfn))
  		return -EEXIST;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
223
  	ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
3947be196   Dave Hansen   [PATCH] memory ho...
224
225
226
  
  	if (ret < 0)
  		return ret;
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
227
228
229
230
  	ret = __add_zone(zone, phys_start_pfn);
  
  	if (ret < 0)
  		return ret;
c04fc586c   Gary Hade   mm: show node to ...
231
  	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
3947be196   Dave Hansen   [PATCH] memory ho...
232
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
233
234
235
236
237
238
239
240
241
242
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
  static int __remove_section(struct zone *zone, struct mem_section *ms)
  {
  	/*
  	 * XXX: Freeing memmap with vmemmap is not implement yet.
  	 *      This should be removed later.
  	 */
  	return -EBUSY;
  }
  #else
ea01ea937   Badari Pulavarty   hotplug memory re...
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
  static int __remove_section(struct zone *zone, struct mem_section *ms)
  {
  	unsigned long flags;
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int ret = -EINVAL;
  
  	if (!valid_section(ms))
  		return ret;
  
  	ret = unregister_memory_section(ms);
  	if (ret)
  		return ret;
  
  	pgdat_resize_lock(pgdat, &flags);
  	sparse_remove_one_section(zone, ms);
  	pgdat_resize_unlock(pgdat, &flags);
  	return 0;
  }
0c0a4a517   Yasunori Goto   memory hotplug: f...
261
  #endif
ea01ea937   Badari Pulavarty   hotplug memory re...
262

3947be196   Dave Hansen   [PATCH] memory ho...
263
264
265
266
267
268
  /*
   * Reasonably generic function for adding memory.  It is
   * expected that archs that support memory hotplug will
   * call this function after deciding the zone to which to
   * add the new pages.
   */
c04fc586c   Gary Hade   mm: show node to ...
269
270
  int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
  			unsigned long nr_pages)
3947be196   Dave Hansen   [PATCH] memory ho...
271
272
273
  {
  	unsigned long i;
  	int err = 0;
6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
274
275
276
277
  	int start_sec, end_sec;
  	/* during initialize mem_map, align hot-added range to section */
  	start_sec = pfn_to_section_nr(phys_start_pfn);
  	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
3947be196   Dave Hansen   [PATCH] memory ho...
278

6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
279
  	for (i = start_sec; i <= end_sec; i++) {
c04fc586c   Gary Hade   mm: show node to ...
280
  		err = __add_section(nid, zone, i << PFN_SECTION_SHIFT);
3947be196   Dave Hansen   [PATCH] memory ho...
281

6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
282
  		/*
183ff22bb   Simon Arlott   spelling fixes: mm/
283
  		 * EEXIST is finally dealt with by ioresource collision
6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
284
285
  		 * check. see add_memory() => register_memory_resource()
  		 * Warning will be printed if there is collision.
bed120c64   Joel H Schopp   [PATCH] spufs: fi...
286
287
  		 */
  		if (err && (err != -EEXIST))
3947be196   Dave Hansen   [PATCH] memory ho...
288
  			break;
6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
289
  		err = 0;
3947be196   Dave Hansen   [PATCH] memory ho...
290
291
292
293
  	}
  
  	return err;
  }
bed120c64   Joel H Schopp   [PATCH] spufs: fi...
294
  EXPORT_SYMBOL_GPL(__add_pages);
3947be196   Dave Hansen   [PATCH] memory ho...
295

ea01ea937   Badari Pulavarty   hotplug memory re...
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  /**
   * __remove_pages() - remove sections of pages from a zone
   * @zone: zone from which pages need to be removed
   * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
   * @nr_pages: number of pages to remove (must be multiple of section size)
   *
   * Generic helper function to remove section mappings and sysfs entries
   * for the section of the memory we are removing. Caller needs to make
   * sure that pages are marked reserved and zones are adjust properly by
   * calling offline_pages().
   */
  int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
  		 unsigned long nr_pages)
  {
  	unsigned long i, ret = 0;
  	int sections_to_remove;
  
  	/*
  	 * We can only remove entire sections
  	 */
  	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
  	BUG_ON(nr_pages % PAGES_PER_SECTION);
ea01ea937   Badari Pulavarty   hotplug memory re...
318
319
320
  	sections_to_remove = nr_pages / PAGES_PER_SECTION;
  	for (i = 0; i < sections_to_remove; i++) {
  		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
de7f0cba9   Nathan Fontenot   memory hotplug: r...
321
322
  		release_mem_region(pfn << PAGE_SHIFT,
  				   PAGES_PER_SECTION << PAGE_SHIFT);
ea01ea937   Badari Pulavarty   hotplug memory re...
323
324
325
326
327
328
329
  		ret = __remove_section(zone, __pfn_to_section(pfn));
  		if (ret)
  			break;
  	}
  	return ret;
  }
  EXPORT_SYMBOL_GPL(__remove_pages);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
330
331
  void online_page(struct page *page)
  {
4738e1b9c   Jan Beulich   memory hotplug: f...
332
  	unsigned long pfn = page_to_pfn(page);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
333
  	totalram_pages++;
4738e1b9c   Jan Beulich   memory hotplug: f...
334
335
  	if (pfn >= num_physpages)
  		num_physpages = pfn + 1;
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
336
337
338
339
340
341
342
343
344
345
346
347
348
349
  
  #ifdef CONFIG_HIGHMEM
  	if (PageHighMem(page))
  		totalhigh_pages++;
  #endif
  
  #ifdef CONFIG_FLATMEM
  	max_mapnr = max(page_to_pfn(page), max_mapnr);
  #endif
  
  	ClearPageReserved(page);
  	init_page_count(page);
  	__free_page(page);
  }
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
350
351
  static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
  			void *arg)
3947be196   Dave Hansen   [PATCH] memory ho...
352
353
  {
  	unsigned long i;
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
  	unsigned long onlined_pages = *(unsigned long *)arg;
  	struct page *page;
  	if (PageReserved(pfn_to_page(start_pfn)))
  		for (i = 0; i < nr_pages; i++) {
  			page = pfn_to_page(start_pfn + i);
  			online_page(page);
  			onlined_pages++;
  		}
  	*(unsigned long *)arg = onlined_pages;
  	return 0;
  }
  
  
  int online_pages(unsigned long pfn, unsigned long nr_pages)
  {
3947be196   Dave Hansen   [PATCH] memory ho...
369
370
  	unsigned long onlined_pages = 0;
  	struct zone *zone;
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
371
  	int need_zonelists_rebuild = 0;
7b78d335a   Yasunori Goto   memory hotplug: r...
372
373
374
375
376
377
378
379
380
381
382
  	int nid;
  	int ret;
  	struct memory_notify arg;
  
  	arg.start_pfn = pfn;
  	arg.nr_pages = nr_pages;
  	arg.status_change_nid = -1;
  
  	nid = page_to_nid(pfn_to_page(pfn));
  	if (node_present_pages(nid) == 0)
  		arg.status_change_nid = nid;
3947be196   Dave Hansen   [PATCH] memory ho...
383

7b78d335a   Yasunori Goto   memory hotplug: r...
384
385
386
387
388
389
  	ret = memory_notify(MEM_GOING_ONLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret) {
  		memory_notify(MEM_CANCEL_ONLINE, &arg);
  		return ret;
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
390
391
392
  	/*
  	 * This doesn't need a lock to do pfn_to_page().
  	 * The section can't be removed here because of the
da19cbcf7   Daniel Walker   driver core: memo...
393
  	 * memory_block->state_mutex.
3947be196   Dave Hansen   [PATCH] memory ho...
394
395
  	 */
  	zone = page_zone(pfn_to_page(pfn));
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
396
397
398
399
400
  	/*
  	 * If this zone is not populated, then it is not in zonelist.
  	 * This means the page allocator ignores this zone.
  	 * So, zonelist must be updated after online.
  	 */
4eaf3f643   Haicheng Li   mem-hotplug: fix ...
401
  	mutex_lock(&zonelists_mutex);
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
402
403
  	if (!populated_zone(zone))
  		need_zonelists_rebuild = 1;
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
404
  	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
405
  		online_pages_range);
fd8a4221a   Geoff Levand   memory_hotplug: c...
406
  	if (ret) {
4eaf3f643   Haicheng Li   mem-hotplug: fix ...
407
  		mutex_unlock(&zonelists_mutex);
fd8a4221a   Geoff Levand   memory_hotplug: c...
408
409
410
411
412
413
  		printk(KERN_DEBUG "online_pages %lx at %lx failed
  ",
  			nr_pages, pfn);
  		memory_notify(MEM_CANCEL_ONLINE, &arg);
  		return ret;
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
414
  	zone->present_pages += onlined_pages;
f2937be58   Yasunori Goto   [PATCH] memory ho...
415
  	zone->zone_pgdat->node_present_pages += onlined_pages;
1f522509c   Haicheng Li   mem-hotplug: avoi...
416
417
418
419
  	if (need_zonelists_rebuild)
  		build_all_zonelists(zone);
  	else
  		zone_pcp_update(zone);
3947be196   Dave Hansen   [PATCH] memory ho...
420

4eaf3f643   Haicheng Li   mem-hotplug: fix ...
421
  	mutex_unlock(&zonelists_mutex);
bc75d33f0   Minchan Kim   page-allocator: c...
422
  	setup_per_zone_wmarks();
bce7394a3   Minchan Kim   page-allocator: r...
423
  	calculate_zone_inactive_ratio(zone);
7ea1530ab   Christoph Lameter   Memoryless nodes:...
424
425
426
427
  	if (onlined_pages) {
  		kswapd_run(zone_to_nid(zone));
  		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
  	}
61b13993a   Dave Hansen   [PATCH] memory ho...
428

1f522509c   Haicheng Li   mem-hotplug: avoi...
429
  	vm_total_pages = nr_free_pagecache_pages();
2f7f24eca   Kent Liu   memory-hotplug: d...
430

2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
431
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
432
433
434
  
  	if (onlined_pages)
  		memory_notify(MEM_ONLINE, &arg);
3947be196   Dave Hansen   [PATCH] memory ho...
435
436
  	return 0;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
437
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
438

e13193319   Hidetoshi Seto   mm/memory_hotplug...
439
440
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
  {
  	struct pglist_data *pgdat;
  	unsigned long zones_size[MAX_NR_ZONES] = {0};
  	unsigned long zholes_size[MAX_NR_ZONES] = {0};
  	unsigned long start_pfn = start >> PAGE_SHIFT;
  
  	pgdat = arch_alloc_nodedata(nid);
  	if (!pgdat)
  		return NULL;
  
  	arch_refresh_nodedata(nid, pgdat);
  
  	/* we can use NODE_DATA(nid) from here */
  
  	/* init node's zones as empty zones, we don't have any present pages.*/
9109fb7b3   Johannes Weiner   mm: drop unneeded...
456
  	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
457
458
459
460
461
462
463
464
465
466
  
  	return pgdat;
  }
  
  static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
  {
  	arch_refresh_nodedata(nid, NULL);
  	arch_free_nodedata(pgdat);
  	return;
  }
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
467

cf23422b9   minskey guo   cpu/mem hotplug: ...
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
  /*
   * called by cpu_up() to online a node without onlined memory.
   */
  int mem_online_node(int nid)
  {
  	pg_data_t	*pgdat;
  	int	ret;
  
  	lock_system_sleep();
  	pgdat = hotadd_new_pgdat(nid, 0);
  	if (pgdat) {
  		ret = -ENOMEM;
  		goto out;
  	}
  	node_set_online(nid);
  	ret = register_one_node(nid);
  	BUG_ON(ret);
  
  out:
  	unlock_system_sleep();
  	return ret;
  }
31168481c   Al Viro   meminit section w...
490
491
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  int __ref add_memory(int nid, u64 start, u64 size)
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
492
  {
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
493
494
  	pg_data_t *pgdat = NULL;
  	int new_pgdat = 0;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
495
  	struct resource *res;
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
496
  	int ret;
6ad696d2c   Andi Kleen   mm: allow memory ...
497
  	lock_system_sleep();
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
498
  	res = register_memory_resource(start, size);
6ad696d2c   Andi Kleen   mm: allow memory ...
499
  	ret = -EEXIST;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
500
  	if (!res)
6ad696d2c   Andi Kleen   mm: allow memory ...
501
  		goto out;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
502

9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
503
504
  	if (!node_online(nid)) {
  		pgdat = hotadd_new_pgdat(nid, start);
6ad696d2c   Andi Kleen   mm: allow memory ...
505
  		ret = -ENOMEM;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
506
  		if (!pgdat)
6ad696d2c   Andi Kleen   mm: allow memory ...
507
  			goto out;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
508
  		new_pgdat = 1;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
509
  	}
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
510
511
  	/* call arch's memory hotadd */
  	ret = arch_add_memory(nid, start, size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
512
513
  	if (ret < 0)
  		goto error;
0fc44159b   Yasunori Goto   [PATCH] Register ...
514
  	/* we online node here. we can't roll back from here. */
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
515
  	node_set_online(nid);
0fc44159b   Yasunori Goto   [PATCH] Register ...
516
517
518
519
520
521
522
523
524
  	if (new_pgdat) {
  		ret = register_one_node(nid);
  		/*
  		 * If sysfs file of new node can't create, cpu on the node
  		 * can't be hot-added. There is no rollback way now.
  		 * So, check by BUG_ON() to catch it reluctantly..
  		 */
  		BUG_ON(ret);
  	}
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
525
526
  	/* create new memmap entry */
  	firmware_map_add_hotplug(start, start + size, "System RAM");
6ad696d2c   Andi Kleen   mm: allow memory ...
527
  	goto out;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
528
529
530
531
  error:
  	/* rollback pgdat allocation and others */
  	if (new_pgdat)
  		rollback_node_hotadd(nid, pgdat);
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
532
533
  	if (res)
  		release_memory_resource(res);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
534

6ad696d2c   Andi Kleen   mm: allow memory ...
535
536
  out:
  	unlock_system_sleep();
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
537
538
539
  	return ret;
  }
  EXPORT_SYMBOL_GPL(add_memory);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
540
541
542
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  /*
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
543
544
545
546
547
548
549
550
551
552
553
554
555
556
   * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
   * set and the size of the free page is given by page_order(). Using this,
   * the function determines if the pageblock contains only free pages.
   * Due to buddy contraints, a free page at least the size of a pageblock will
   * be located at the start of the pageblock
   */
  static inline int pageblock_free(struct page *page)
  {
  	return PageBuddy(page) && page_order(page) >= pageblock_order;
  }
  
  /* Return the start of the next active pageblock after a given page */
  static struct page *next_active_pageblock(struct page *page)
  {
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
557
558
  	/* Ensure the starting page is pageblock-aligned */
  	BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
559
  	/* If the entire pageblock is free, move to the end of free page */
0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
560
561
562
563
564
565
566
  	if (pageblock_free(page)) {
  		int order;
  		/* be careful. we don't have locks, page_order can be changed.*/
  		order = page_order(page);
  		if ((order < MAX_ORDER) && (order >= pageblock_order))
  			return page + (1 << order);
  	}
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
567

0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
568
  	return page + pageblock_nr_pages;
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
  }
  
  /* Checks if this range of memory is likely to be hot-removable. */
  int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
  {
  	int type;
  	struct page *page = pfn_to_page(start_pfn);
  	struct page *end_page = page + nr_pages;
  
  	/* Check the starting page of each pageblock within the range */
  	for (; page < end_page; page = next_active_pageblock(page)) {
  		type = get_pageblock_migratetype(page);
  
  		/*
  		 * A pageblock containing MOVABLE or free pages is considered
  		 * removable
  		 */
  		if (type != MIGRATE_MOVABLE && !pageblock_free(page))
  			return 0;
  
  		/*
  		 * A pageblock starting with a PageReserved page is not
  		 * considered removable.
  		 */
  		if (PageReserved(page))
  			return 0;
  	}
  
  	/* All pageblocks in the memory block are likely to be hot-removable */
  	return 1;
  }
  
  /*
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
   * Confirm all pages in a range [start, end) is belongs to the same zone.
   */
  static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct zone *zone = NULL;
  	struct page *page;
  	int i;
  	for (pfn = start_pfn;
  	     pfn < end_pfn;
  	     pfn += MAX_ORDER_NR_PAGES) {
  		i = 0;
  		/* This is just a CONFIG_HOLES_IN_ZONE check.*/
  		while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
  			i++;
  		if (i == MAX_ORDER_NR_PAGES)
  			continue;
  		page = pfn_to_page(pfn + i);
  		if (zone && page_zone(page) != zone)
  			return 0;
  		zone = page_zone(page);
  	}
  	return 1;
  }
  
  /*
   * Scanning pfn is much easier than scanning lru list.
   * Scan pfn from start to end and Find LRU page.
   */
  int scan_lru_pages(unsigned long start, unsigned long end)
  {
  	unsigned long pfn;
  	struct page *page;
  	for (pfn = start; pfn < end; pfn++) {
  		if (pfn_valid(pfn)) {
  			page = pfn_to_page(pfn);
  			if (PageLRU(page))
  				return pfn;
  		}
  	}
  	return 0;
  }
  
  static struct page *
3c1d43787   Hugh Dickins   mm: remove GFP_HI...
646
  hotremove_migrate_alloc(struct page *page, unsigned long private, int **x)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
647
  {
3c1d43787   Hugh Dickins   mm: remove GFP_HI...
648
649
  	/* This should be improooooved!! */
  	return alloc_page(GFP_HIGHUSER_MOVABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
650
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
  #define NR_OFFLINE_AT_ONCE_PAGES	(256)
  static int
  do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct page *page;
  	int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
  	int not_managed = 0;
  	int ret = 0;
  	LIST_HEAD(source);
  
  	for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
  		if (!pfn_valid(pfn))
  			continue;
  		page = pfn_to_page(pfn);
  		if (!page_count(page))
  			continue;
  		/*
  		 * We can skip free pages. And we can only deal with pages on
  		 * LRU.
  		 */
62695a84e   Nick Piggin   vmscan: move isol...
672
  		ret = isolate_lru_page(page);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
673
  		if (!ret) { /* Success */
62695a84e   Nick Piggin   vmscan: move isol...
674
  			list_add_tail(&page->lru, &source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
675
  			move_pages--;
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
676
677
  			inc_zone_page_state(page, NR_ISOLATED_ANON +
  					    page_is_file_cache(page));
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
678
679
680
681
682
683
  		} else {
  			/* Becasue we don't have big zone->lock. we should
  			   check this again here. */
  			if (page_count(page))
  				not_managed++;
  #ifdef CONFIG_DEBUG_VM
718a38211   Wu Fengguang   mm: introduce dum...
684
685
686
687
  			printk(KERN_ALERT "removing pfn %lx from LRU failed
  ",
  			       pfn);
  			dump_page(page);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
688
689
690
691
692
693
694
695
696
697
698
699
700
  #endif
  		}
  	}
  	ret = -EBUSY;
  	if (not_managed) {
  		if (!list_empty(&source))
  			putback_lru_pages(&source);
  		goto out;
  	}
  	ret = 0;
  	if (list_empty(&source))
  		goto out;
  	/* this function returns # of failed pages */
62b61f611   Hugh Dickins   ksm: memory hotre...
701
  	ret = migrate_pages(&source, hotremove_migrate_alloc, 0, 1);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
  
  out:
  	return ret;
  }
  
  /*
   * remove from free_area[] and mark all as Reserved.
   */
  static int
  offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
  			void *data)
  {
  	__offline_isolated_pages(start, start + nr_pages);
  	return 0;
  }
  
  static void
  offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
721
  	walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
  				offline_isolated_pages_cb);
  }
  
  /*
   * Check all pages in range, recoreded as memory resource, are isolated.
   */
  static int
  check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
  			void *data)
  {
  	int ret;
  	long offlined = *(long *)data;
  	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages);
  	offlined = nr_pages;
  	if (!ret)
  		*(long *)data += offlined;
  	return ret;
  }
  
  static long
  check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
  {
  	long offlined = 0;
  	int ret;
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
746
  	ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
747
748
749
750
751
  			check_pages_isolated_cb);
  	if (ret < 0)
  		offlined = (long)ret;
  	return offlined;
  }
b4e655a4a   Andrew Morton   mm: memory_hotplu...
752
  static int offline_pages(unsigned long start_pfn,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
753
754
755
756
  		  unsigned long end_pfn, unsigned long timeout)
  {
  	unsigned long pfn, nr_pages, expire;
  	long offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
757
  	int ret, drain, retry_max, node;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
758
  	struct zone *zone;
7b78d335a   Yasunori Goto   memory hotplug: r...
759
  	struct memory_notify arg;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
760
761
762
763
764
765
766
767
768
769
770
  
  	BUG_ON(start_pfn >= end_pfn);
  	/* at least, alignment against pageblock is necessary */
  	if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	/* This makes hotplug much easier...and readable.
  	   we assume this for now. .*/
  	if (!test_pages_in_a_zone(start_pfn, end_pfn))
  		return -EINVAL;
7b78d335a   Yasunori Goto   memory hotplug: r...
771

6ad696d2c   Andi Kleen   mm: allow memory ...
772
  	lock_system_sleep();
7b78d335a   Yasunori Goto   memory hotplug: r...
773
774
775
  	zone = page_zone(pfn_to_page(start_pfn));
  	node = zone_to_nid(zone);
  	nr_pages = end_pfn - start_pfn;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
776
777
778
  	/* set above range as isolated */
  	ret = start_isolate_page_range(start_pfn, end_pfn);
  	if (ret)
6ad696d2c   Andi Kleen   mm: allow memory ...
779
  		goto out;
7b78d335a   Yasunori Goto   memory hotplug: r...
780
781
782
783
784
785
786
787
788
789
790
  
  	arg.start_pfn = start_pfn;
  	arg.nr_pages = nr_pages;
  	arg.status_change_nid = -1;
  	if (nr_pages >= node_present_pages(node))
  		arg.status_change_nid = node;
  
  	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret)
  		goto failed_removal;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
  	pfn = start_pfn;
  	expire = jiffies + timeout;
  	drain = 0;
  	retry_max = 5;
  repeat:
  	/* start memory hot removal */
  	ret = -EAGAIN;
  	if (time_after(jiffies, expire))
  		goto failed_removal;
  	ret = -EINTR;
  	if (signal_pending(current))
  		goto failed_removal;
  	ret = 0;
  	if (drain) {
  		lru_add_drain_all();
  		flush_scheduled_work();
  		cond_resched();
9f8f21725   Christoph Lameter   Page allocator: c...
808
  		drain_all_pages();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
  	}
  
  	pfn = scan_lru_pages(start_pfn, end_pfn);
  	if (pfn) { /* We have page on LRU */
  		ret = do_migrate_range(pfn, end_pfn);
  		if (!ret) {
  			drain = 1;
  			goto repeat;
  		} else {
  			if (ret < 0)
  				if (--retry_max == 0)
  					goto failed_removal;
  			yield();
  			drain = 1;
  			goto repeat;
  		}
  	}
  	/* drain all zone's lru pagevec, this is asyncronous... */
  	lru_add_drain_all();
  	flush_scheduled_work();
  	yield();
  	/* drain pcp pages , this is synchrouns. */
9f8f21725   Christoph Lameter   Page allocator: c...
831
  	drain_all_pages();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
832
833
834
835
836
837
838
839
840
841
842
  	/* check again */
  	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
  	if (offlined_pages < 0) {
  		ret = -EBUSY;
  		goto failed_removal;
  	}
  	printk(KERN_INFO "Offlined Pages %ld
  ", offlined_pages);
  	/* Ok, all of our target is islaoted.
  	   We cannot do rollback at this point. */
  	offline_isolated_pages(start_pfn, end_pfn);
dbc0e4cef   KAMEZAWA Hiroyuki   memory hotremove:...
843
844
  	/* reset pagetype flags and makes migrate type to be MOVABLE */
  	undo_isolate_page_range(start_pfn, end_pfn);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
845
  	/* removal success */
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
846
847
848
  	zone->present_pages -= offlined_pages;
  	zone->zone_pgdat->node_present_pages -= offlined_pages;
  	totalram_pages -= offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
849

bce7394a3   Minchan Kim   page-allocator: r...
850
851
  	setup_per_zone_wmarks();
  	calculate_zone_inactive_ratio(zone);
8fe23e057   David Rientjes   mm: clear node in...
852
853
854
855
  	if (!node_present_pages(node)) {
  		node_clear_state(node, N_HIGH_MEMORY);
  		kswapd_stop(node);
  	}
bce7394a3   Minchan Kim   page-allocator: r...
856

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
857
858
  	vm_total_pages = nr_free_pagecache_pages();
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
859
860
  
  	memory_notify(MEM_OFFLINE, &arg);
6ad696d2c   Andi Kleen   mm: allow memory ...
861
  	unlock_system_sleep();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
862
863
864
865
866
867
  	return 0;
  
  failed_removal:
  	printk(KERN_INFO "memory offlining %lx to %lx failed
  ",
  		start_pfn, end_pfn);
7b78d335a   Yasunori Goto   memory hotplug: r...
868
  	memory_notify(MEM_CANCEL_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
869
870
  	/* pushback to free area */
  	undo_isolate_page_range(start_pfn, end_pfn);
7b78d335a   Yasunori Goto   memory hotplug: r...
871

6ad696d2c   Andi Kleen   mm: allow memory ...
872
873
  out:
  	unlock_system_sleep();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
874
875
  	return ret;
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
876
877
878
879
880
881
882
883
884
  
  int remove_memory(u64 start, u64 size)
  {
  	unsigned long start_pfn, end_pfn;
  
  	start_pfn = PFN_DOWN(start);
  	end_pfn = start_pfn + PFN_DOWN(size);
  	return offline_pages(start_pfn, end_pfn, 120 * HZ);
  }
48e94196a   KAMEZAWA Hiroyuki   fix memory hot re...
885
886
887
888
889
  #else
  int remove_memory(u64 start, u64 size)
  {
  	return -EINVAL;
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
890
  #endif /* CONFIG_MEMORY_HOTREMOVE */
71088785c   Badari Pulavarty   mm: cleanup to ma...
891
  EXPORT_SYMBOL_GPL(remove_memory);