Blame view
mm/memory_hotplug.c
14 KB
3947be196 [PATCH] memory ho... |
1 2 3 4 5 |
/* * linux/mm/memory_hotplug.c * * Copyright (C) */ |
3947be196 [PATCH] memory ho... |
6 7 8 9 10 11 12 13 14 |
#include <linux/stddef.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/interrupt.h> #include <linux/pagemap.h> #include <linux/bootmem.h> #include <linux/compiler.h> #include <linux/module.h> #include <linux/pagevec.h> |
2d1d43f6a [PATCH] call mm/p... |
15 |
#include <linux/writeback.h> |
3947be196 [PATCH] memory ho... |
16 17 18 19 20 21 22 |
#include <linux/slab.h> #include <linux/sysctl.h> #include <linux/cpu.h> #include <linux/memory.h> #include <linux/memory_hotplug.h> #include <linux/highmem.h> #include <linux/vmalloc.h> |
0a5470390 [PATCH] register ... |
23 |
#include <linux/ioport.h> |
38837fc75 [PATCH] cpuset: t... |
24 |
#include <linux/cpuset.h> |
0c0e61958 memory unplug: pa... |
25 26 27 |
#include <linux/delay.h> #include <linux/migrate.h> #include <linux/page-isolation.h> |
3947be196 [PATCH] memory ho... |
28 29 |
#include <asm/tlbflush.h> |
45e0b78b0 [PATCH] hot-add-m... |
30 31 32 33 34 35 36 37 38 39 |
/* add this memory to iomem resource */ static struct resource *register_memory_resource(u64 start, u64 size) { struct resource *res; res = kzalloc(sizeof(struct resource), GFP_KERNEL); BUG_ON(!res); res->name = "System RAM"; res->start = start; res->end = start + size - 1; |
887c3cb18 Add IORESOUCE_BUS... |
40 |
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
45e0b78b0 [PATCH] hot-add-m... |
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
if (request_resource(&iomem_resource, res) < 0) { printk("System RAM resource %llx - %llx cannot be added ", (unsigned long long)res->start, (unsigned long long)res->end); kfree(res); res = NULL; } return res; } static void release_memory_resource(struct resource *res) { if (!res) return; release_resource(res); kfree(res); return; } |
53947027a [PATCH] hot-add-m... |
59 |
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
718127cc3 [PATCH] wait_tabl... |
60 |
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) |
3947be196 [PATCH] memory ho... |
61 62 63 64 65 66 67 |
{ struct pglist_data *pgdat = zone->zone_pgdat; int nr_pages = PAGES_PER_SECTION; int nid = pgdat->node_id; int zone_type; zone_type = zone - pgdat->node_zones; |
13466c841 memory hotplug: f... |
68 |
if (!zone->wait_table) { |
718127cc3 [PATCH] wait_tabl... |
69 |
int ret = 0; |
a2f3aa025 [PATCH] Fix spars... |
70 71 |
ret = init_currently_empty_zone(zone, phys_start_pfn, nr_pages, MEMMAP_HOTPLUG); |
718127cc3 [PATCH] wait_tabl... |
72 73 74 |
if (ret < 0) return ret; } |
a2f3aa025 [PATCH] Fix spars... |
75 76 |
memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn, MEMMAP_HOTPLUG); |
718127cc3 [PATCH] wait_tabl... |
77 |
return 0; |
3947be196 [PATCH] memory ho... |
78 |
} |
3947be196 [PATCH] memory ho... |
79 80 |
static int __add_section(struct zone *zone, unsigned long phys_start_pfn) { |
3947be196 [PATCH] memory ho... |
81 |
int nr_pages = PAGES_PER_SECTION; |
3947be196 [PATCH] memory ho... |
82 |
int ret; |
ebd15302d [PATCH] memory ho... |
83 84 |
if (pfn_valid(phys_start_pfn)) return -EEXIST; |
0b0acbec1 [PATCH] memory ho... |
85 |
ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages); |
3947be196 [PATCH] memory ho... |
86 87 88 |
if (ret < 0) return ret; |
718127cc3 [PATCH] wait_tabl... |
89 90 91 92 |
ret = __add_zone(zone, phys_start_pfn); if (ret < 0) return ret; |
3947be196 [PATCH] memory ho... |
93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
return register_new_memory(__pfn_to_section(phys_start_pfn)); } /* * Reasonably generic function for adding memory. It is * expected that archs that support memory hotplug will * call this function after deciding the zone to which to * add the new pages. */ int __add_pages(struct zone *zone, unsigned long phys_start_pfn, unsigned long nr_pages) { unsigned long i; int err = 0; |
6f712711d [PATCH] memory ho... |
107 108 109 110 |
int start_sec, end_sec; /* during initialize mem_map, align hot-added range to section */ start_sec = pfn_to_section_nr(phys_start_pfn); end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); |
3947be196 [PATCH] memory ho... |
111 |
|
6f712711d [PATCH] memory ho... |
112 113 |
for (i = start_sec; i <= end_sec; i++) { err = __add_section(zone, i << PFN_SECTION_SHIFT); |
3947be196 [PATCH] memory ho... |
114 |
|
6f712711d [PATCH] memory ho... |
115 |
/* |
183ff22bb spelling fixes: mm/ |
116 |
* EEXIST is finally dealt with by ioresource collision |
6f712711d [PATCH] memory ho... |
117 118 |
* check. see add_memory() => register_memory_resource() * Warning will be printed if there is collision. |
bed120c64 [PATCH] spufs: fi... |
119 120 |
*/ if (err && (err != -EEXIST)) |
3947be196 [PATCH] memory ho... |
121 |
break; |
6f712711d [PATCH] memory ho... |
122 |
err = 0; |
3947be196 [PATCH] memory ho... |
123 124 125 126 |
} return err; } |
bed120c64 [PATCH] spufs: fi... |
127 |
EXPORT_SYMBOL_GPL(__add_pages); |
3947be196 [PATCH] memory ho... |
128 129 130 131 132 133 134 135 136 137 138 |
static void grow_zone_span(struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { unsigned long old_zone_end_pfn; zone_span_writelock(zone); old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; if (start_pfn < zone->zone_start_pfn) zone->zone_start_pfn = start_pfn; |
25a6df952 [PATCH] spanned_p... |
139 140 |
zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - zone->zone_start_pfn; |
3947be196 [PATCH] memory ho... |
141 142 143 144 145 146 147 148 149 150 151 152 |
zone_span_writeunlock(zone); } static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, unsigned long end_pfn) { unsigned long old_pgdat_end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages; if (start_pfn < pgdat->node_start_pfn) pgdat->node_start_pfn = start_pfn; |
25a6df952 [PATCH] spanned_p... |
153 154 |
pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) - pgdat->node_start_pfn; |
3947be196 [PATCH] memory ho... |
155 |
} |
75884fb1c memory unplug: me... |
156 157 |
static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, void *arg) |
3947be196 [PATCH] memory ho... |
158 159 |
{ unsigned long i; |
75884fb1c memory unplug: me... |
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
unsigned long onlined_pages = *(unsigned long *)arg; struct page *page; if (PageReserved(pfn_to_page(start_pfn))) for (i = 0; i < nr_pages; i++) { page = pfn_to_page(start_pfn + i); online_page(page); onlined_pages++; } *(unsigned long *)arg = onlined_pages; return 0; } int online_pages(unsigned long pfn, unsigned long nr_pages) { |
3947be196 [PATCH] memory ho... |
175 176 177 |
unsigned long flags; unsigned long onlined_pages = 0; struct zone *zone; |
6811378e7 [PATCH] wait_tabl... |
178 |
int need_zonelists_rebuild = 0; |
7b78d335a memory hotplug: r... |
179 180 181 182 183 184 185 186 187 188 189 |
int nid; int ret; struct memory_notify arg; arg.start_pfn = pfn; arg.nr_pages = nr_pages; arg.status_change_nid = -1; nid = page_to_nid(pfn_to_page(pfn)); if (node_present_pages(nid) == 0) arg.status_change_nid = nid; |
3947be196 [PATCH] memory ho... |
190 |
|
7b78d335a memory hotplug: r... |
191 192 193 194 195 196 |
ret = memory_notify(MEM_GOING_ONLINE, &arg); ret = notifier_to_errno(ret); if (ret) { memory_notify(MEM_CANCEL_ONLINE, &arg); return ret; } |
3947be196 [PATCH] memory ho... |
197 198 199 200 201 202 203 204 205 206 |
/* * This doesn't need a lock to do pfn_to_page(). * The section can't be removed here because of the * memory_block->state_sem. */ zone = page_zone(pfn_to_page(pfn)); pgdat_resize_lock(zone->zone_pgdat, &flags); grow_zone_span(zone, pfn, pfn + nr_pages); grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages); pgdat_resize_unlock(zone->zone_pgdat, &flags); |
6811378e7 [PATCH] wait_tabl... |
207 208 209 210 211 212 213 |
/* * If this zone is not populated, then it is not in zonelist. * This means the page allocator ignores this zone. * So, zonelist must be updated after online. */ if (!populated_zone(zone)) need_zonelists_rebuild = 1; |
75884fb1c memory unplug: me... |
214 215 |
walk_memory_resource(pfn, nr_pages, &onlined_pages, online_pages_range); |
3947be196 [PATCH] memory ho... |
216 |
zone->present_pages += onlined_pages; |
f2937be58 [PATCH] memory ho... |
217 |
zone->zone_pgdat->node_present_pages += onlined_pages; |
3947be196 [PATCH] memory ho... |
218 |
|
61b13993a [PATCH] memory ho... |
219 |
setup_per_zone_pages_min(); |
7ea1530ab Memoryless nodes:... |
220 221 222 223 |
if (onlined_pages) { kswapd_run(zone_to_nid(zone)); node_set_state(zone_to_nid(zone), N_HIGH_MEMORY); } |
61b13993a [PATCH] memory ho... |
224 |
|
6811378e7 [PATCH] wait_tabl... |
225 226 |
if (need_zonelists_rebuild) build_all_zonelists(); |
5a4d43615 [PATCH] update vm... |
227 |
vm_total_pages = nr_free_pagecache_pages(); |
2d1d43f6a [PATCH] call mm/p... |
228 |
writeback_set_ratelimit(); |
7b78d335a memory hotplug: r... |
229 230 231 |
if (onlined_pages) memory_notify(MEM_ONLINE, &arg); |
3947be196 [PATCH] memory ho... |
232 233 |
return 0; } |
53947027a [PATCH] hot-add-m... |
234 |
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ |
bc02af93d [PATCH] pgdat all... |
235 |
|
9af3c2dea [PATCH] pgdat all... |
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 |
static pg_data_t *hotadd_new_pgdat(int nid, u64 start) { struct pglist_data *pgdat; unsigned long zones_size[MAX_NR_ZONES] = {0}; unsigned long zholes_size[MAX_NR_ZONES] = {0}; unsigned long start_pfn = start >> PAGE_SHIFT; pgdat = arch_alloc_nodedata(nid); if (!pgdat) return NULL; arch_refresh_nodedata(nid, pgdat); /* we can use NODE_DATA(nid) from here */ /* init node's zones as empty zones, we don't have any present pages.*/ free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size); return pgdat; } static void rollback_node_hotadd(int nid, pg_data_t *pgdat) { arch_refresh_nodedata(nid, NULL); arch_free_nodedata(pgdat); return; } |
0a5470390 [PATCH] register ... |
263 |
|
bc02af93d [PATCH] pgdat all... |
264 265 |
int add_memory(int nid, u64 start, u64 size) { |
9af3c2dea [PATCH] pgdat all... |
266 267 |
pg_data_t *pgdat = NULL; int new_pgdat = 0; |
ebd15302d [PATCH] memory ho... |
268 |
struct resource *res; |
bc02af93d [PATCH] pgdat all... |
269 |
int ret; |
ebd15302d [PATCH] memory ho... |
270 271 272 |
res = register_memory_resource(start, size); if (!res) return -EEXIST; |
9af3c2dea [PATCH] pgdat all... |
273 274 275 276 277 |
if (!node_online(nid)) { pgdat = hotadd_new_pgdat(nid, start); if (!pgdat) return -ENOMEM; new_pgdat = 1; |
9af3c2dea [PATCH] pgdat all... |
278 |
} |
bc02af93d [PATCH] pgdat all... |
279 280 |
/* call arch's memory hotadd */ ret = arch_add_memory(nid, start, size); |
9af3c2dea [PATCH] pgdat all... |
281 282 |
if (ret < 0) goto error; |
0fc44159b [PATCH] Register ... |
283 |
/* we online node here. we can't roll back from here. */ |
9af3c2dea [PATCH] pgdat all... |
284 |
node_set_online(nid); |
38837fc75 [PATCH] cpuset: t... |
285 |
cpuset_track_online_nodes(); |
0fc44159b [PATCH] Register ... |
286 287 288 289 290 291 292 293 294 |
if (new_pgdat) { ret = register_one_node(nid); /* * If sysfs file of new node can't create, cpu on the node * can't be hot-added. There is no rollback way now. * So, check by BUG_ON() to catch it reluctantly.. */ BUG_ON(ret); } |
9af3c2dea [PATCH] pgdat all... |
295 296 297 298 299 |
return ret; error: /* rollback pgdat allocation and others */ if (new_pgdat) rollback_node_hotadd(nid, pgdat); |
ebd15302d [PATCH] memory ho... |
300 301 |
if (res) release_memory_resource(res); |
9af3c2dea [PATCH] pgdat all... |
302 |
|
bc02af93d [PATCH] pgdat all... |
303 304 305 |
return ret; } EXPORT_SYMBOL_GPL(add_memory); |
0c0e61958 memory unplug: pa... |
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 |
#ifdef CONFIG_MEMORY_HOTREMOVE /* * Confirm all pages in a range [start, end) is belongs to the same zone. */ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; struct zone *zone = NULL; struct page *page; int i; for (pfn = start_pfn; pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES) { i = 0; /* This is just a CONFIG_HOLES_IN_ZONE check.*/ while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i)) i++; if (i == MAX_ORDER_NR_PAGES) continue; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; zone = page_zone(page); } return 1; } /* * Scanning pfn is much easier than scanning lru list. * Scan pfn from start to end and Find LRU page. */ int scan_lru_pages(unsigned long start, unsigned long end) { unsigned long pfn; struct page *page; for (pfn = start; pfn < end; pfn++) { if (pfn_valid(pfn)) { page = pfn_to_page(pfn); if (PageLRU(page)) return pfn; } } return 0; } static struct page * hotremove_migrate_alloc(struct page *page, unsigned long private, int **x) { /* This should be improoooooved!! */ return alloc_page(GFP_HIGHUSER_PAGECACHE); } #define NR_OFFLINE_AT_ONCE_PAGES (256) static int do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; struct page *page; int move_pages = NR_OFFLINE_AT_ONCE_PAGES; int not_managed = 0; int ret = 0; LIST_HEAD(source); for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) { if (!pfn_valid(pfn)) continue; page = pfn_to_page(pfn); if (!page_count(page)) continue; /* * We can skip free pages. And we can only deal with pages on * LRU. */ ret = isolate_lru_page(page, &source); if (!ret) { /* Success */ move_pages--; } else { /* Becasue we don't have big zone->lock. we should check this again here. */ if (page_count(page)) not_managed++; #ifdef CONFIG_DEBUG_VM printk(KERN_INFO "removing from LRU failed" " %lx/%d/%lx ", pfn, page_count(page), page->flags); #endif } } ret = -EBUSY; if (not_managed) { if (!list_empty(&source)) putback_lru_pages(&source); goto out; } ret = 0; if (list_empty(&source)) goto out; /* this function returns # of failed pages */ ret = migrate_pages(&source, hotremove_migrate_alloc, 0); out: return ret; } /* * remove from free_area[] and mark all as Reserved. */ static int offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages, void *data) { __offline_isolated_pages(start, start + nr_pages); return 0; } static void offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) { walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL, offline_isolated_pages_cb); } /* * Check all pages in range, recoreded as memory resource, are isolated. */ static int check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages, void *data) { int ret; long offlined = *(long *)data; ret = test_pages_isolated(start_pfn, start_pfn + nr_pages); offlined = nr_pages; if (!ret) *(long *)data += offlined; return ret; } static long check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) { long offlined = 0; int ret; ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined, check_pages_isolated_cb); if (ret < 0) offlined = (long)ret; return offlined; } extern void drain_all_local_pages(void); int offline_pages(unsigned long start_pfn, unsigned long end_pfn, unsigned long timeout) { unsigned long pfn, nr_pages, expire; long offlined_pages; |
7b78d335a memory hotplug: r... |
469 |
int ret, drain, retry_max, node; |
0c0e61958 memory unplug: pa... |
470 |
struct zone *zone; |
7b78d335a memory hotplug: r... |
471 |
struct memory_notify arg; |
0c0e61958 memory unplug: pa... |
472 473 474 475 476 477 478 479 480 481 482 |
BUG_ON(start_pfn >= end_pfn); /* at least, alignment against pageblock is necessary */ if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) return -EINVAL; if (!IS_ALIGNED(end_pfn, pageblock_nr_pages)) return -EINVAL; /* This makes hotplug much easier...and readable. we assume this for now. .*/ if (!test_pages_in_a_zone(start_pfn, end_pfn)) return -EINVAL; |
7b78d335a memory hotplug: r... |
483 484 485 486 |
zone = page_zone(pfn_to_page(start_pfn)); node = zone_to_nid(zone); nr_pages = end_pfn - start_pfn; |
0c0e61958 memory unplug: pa... |
487 488 489 490 |
/* set above range as isolated */ ret = start_isolate_page_range(start_pfn, end_pfn); if (ret) return ret; |
7b78d335a memory hotplug: r... |
491 492 493 494 495 496 497 498 499 500 501 |
arg.start_pfn = start_pfn; arg.nr_pages = nr_pages; arg.status_change_nid = -1; if (nr_pages >= node_present_pages(node)) arg.status_change_nid = node; ret = memory_notify(MEM_GOING_OFFLINE, &arg); ret = notifier_to_errno(ret); if (ret) goto failed_removal; |
0c0e61958 memory unplug: pa... |
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 |
pfn = start_pfn; expire = jiffies + timeout; drain = 0; retry_max = 5; repeat: /* start memory hot removal */ ret = -EAGAIN; if (time_after(jiffies, expire)) goto failed_removal; ret = -EINTR; if (signal_pending(current)) goto failed_removal; ret = 0; if (drain) { lru_add_drain_all(); flush_scheduled_work(); cond_resched(); drain_all_local_pages(); } pfn = scan_lru_pages(start_pfn, end_pfn); if (pfn) { /* We have page on LRU */ ret = do_migrate_range(pfn, end_pfn); if (!ret) { drain = 1; goto repeat; } else { if (ret < 0) if (--retry_max == 0) goto failed_removal; yield(); drain = 1; goto repeat; } } /* drain all zone's lru pagevec, this is asyncronous... */ lru_add_drain_all(); flush_scheduled_work(); yield(); /* drain pcp pages , this is synchrouns. */ drain_all_local_pages(); /* check again */ offlined_pages = check_pages_isolated(start_pfn, end_pfn); if (offlined_pages < 0) { ret = -EBUSY; goto failed_removal; } printk(KERN_INFO "Offlined Pages %ld ", offlined_pages); /* Ok, all of our target is islaoted. We cannot do rollback at this point. */ offline_isolated_pages(start_pfn, end_pfn); |
dbc0e4cef memory hotremove:... |
554 555 |
/* reset pagetype flags and makes migrate type to be MOVABLE */ undo_isolate_page_range(start_pfn, end_pfn); |
0c0e61958 memory unplug: pa... |
556 |
/* removal success */ |
0c0e61958 memory unplug: pa... |
557 558 559 560 |
zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; totalram_pages -= offlined_pages; num_physpages -= offlined_pages; |
7b78d335a memory hotplug: r... |
561 |
|
0c0e61958 memory unplug: pa... |
562 563 |
vm_total_pages = nr_free_pagecache_pages(); writeback_set_ratelimit(); |
7b78d335a memory hotplug: r... |
564 565 |
memory_notify(MEM_OFFLINE, &arg); |
0c0e61958 memory unplug: pa... |
566 567 568 569 570 571 |
return 0; failed_removal: printk(KERN_INFO "memory offlining %lx to %lx failed ", start_pfn, end_pfn); |
7b78d335a memory hotplug: r... |
572 |
memory_notify(MEM_CANCEL_OFFLINE, &arg); |
0c0e61958 memory unplug: pa... |
573 574 |
/* pushback to free area */ undo_isolate_page_range(start_pfn, end_pfn); |
7b78d335a memory hotplug: r... |
575 |
|
0c0e61958 memory unplug: pa... |
576 577 |
return ret; } |
48e94196a fix memory hot re... |
578 579 580 581 582 583 |
#else int remove_memory(u64 start, u64 size) { return -EINVAL; } EXPORT_SYMBOL_GPL(remove_memory); |
0c0e61958 memory unplug: pa... |
584 |
#endif /* CONFIG_MEMORY_HOTREMOVE */ |