Blame view

mm/memory_hotplug.c 14 KB
3947be196   Dave Hansen   [PATCH] memory ho...
1
2
3
4
5
  /*
   *  linux/mm/memory_hotplug.c
   *
   *  Copyright (C)
   */
3947be196   Dave Hansen   [PATCH] memory ho...
6
7
8
9
10
11
12
13
14
  #include <linux/stddef.h>
  #include <linux/mm.h>
  #include <linux/swap.h>
  #include <linux/interrupt.h>
  #include <linux/pagemap.h>
  #include <linux/bootmem.h>
  #include <linux/compiler.h>
  #include <linux/module.h>
  #include <linux/pagevec.h>
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
15
  #include <linux/writeback.h>
3947be196   Dave Hansen   [PATCH] memory ho...
16
17
18
19
20
21
22
  #include <linux/slab.h>
  #include <linux/sysctl.h>
  #include <linux/cpu.h>
  #include <linux/memory.h>
  #include <linux/memory_hotplug.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
23
  #include <linux/ioport.h>
38837fc75   Paul Jackson   [PATCH] cpuset: t...
24
  #include <linux/cpuset.h>
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
25
26
27
  #include <linux/delay.h>
  #include <linux/migrate.h>
  #include <linux/page-isolation.h>
3947be196   Dave Hansen   [PATCH] memory ho...
28
29
  
  #include <asm/tlbflush.h>
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
30
31
32
33
34
35
36
37
38
39
  /* add this memory to iomem resource */
  static struct resource *register_memory_resource(u64 start, u64 size)
  {
  	struct resource *res;
  	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
  	BUG_ON(!res);
  
  	res->name = "System RAM";
  	res->start = start;
  	res->end = start + size - 1;
887c3cb18   Yasunori Goto   Add IORESOUCE_BUS...
40
  	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  	if (request_resource(&iomem_resource, res) < 0) {
  		printk("System RAM resource %llx - %llx cannot be added
  ",
  		(unsigned long long)res->start, (unsigned long long)res->end);
  		kfree(res);
  		res = NULL;
  	}
  	return res;
  }
  
  static void release_memory_resource(struct resource *res)
  {
  	if (!res)
  		return;
  	release_resource(res);
  	kfree(res);
  	return;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
59
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
60
  static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
3947be196   Dave Hansen   [PATCH] memory ho...
61
62
63
64
65
66
67
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nr_pages = PAGES_PER_SECTION;
  	int nid = pgdat->node_id;
  	int zone_type;
  
  	zone_type = zone - pgdat->node_zones;
13466c841   Yasunori Goto   memory hotplug: f...
68
  	if (!zone->wait_table) {
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
69
  		int ret = 0;
a2f3aa025   Dave Hansen   [PATCH] Fix spars...
70
71
  		ret = init_currently_empty_zone(zone, phys_start_pfn,
  						nr_pages, MEMMAP_HOTPLUG);
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
72
73
74
  		if (ret < 0)
  			return ret;
  	}
a2f3aa025   Dave Hansen   [PATCH] Fix spars...
75
76
  	memmap_init_zone(nr_pages, nid, zone_type,
  			 phys_start_pfn, MEMMAP_HOTPLUG);
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
77
  	return 0;
3947be196   Dave Hansen   [PATCH] memory ho...
78
  }
3947be196   Dave Hansen   [PATCH] memory ho...
79
80
  static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
  {
3947be196   Dave Hansen   [PATCH] memory ho...
81
  	int nr_pages = PAGES_PER_SECTION;
3947be196   Dave Hansen   [PATCH] memory ho...
82
  	int ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
83
84
  	if (pfn_valid(phys_start_pfn))
  		return -EEXIST;
0b0acbec1   Dave Hansen   [PATCH] memory ho...
85
  	ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
3947be196   Dave Hansen   [PATCH] memory ho...
86
87
88
  
  	if (ret < 0)
  		return ret;
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
89
90
91
92
  	ret = __add_zone(zone, phys_start_pfn);
  
  	if (ret < 0)
  		return ret;
3947be196   Dave Hansen   [PATCH] memory ho...
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  	return register_new_memory(__pfn_to_section(phys_start_pfn));
  }
  
  /*
   * Reasonably generic function for adding memory.  It is
   * expected that archs that support memory hotplug will
   * call this function after deciding the zone to which to
   * add the new pages.
   */
  int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
  		 unsigned long nr_pages)
  {
  	unsigned long i;
  	int err = 0;
6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
107
108
109
110
  	int start_sec, end_sec;
  	/* during initialize mem_map, align hot-added range to section */
  	start_sec = pfn_to_section_nr(phys_start_pfn);
  	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
3947be196   Dave Hansen   [PATCH] memory ho...
111

6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
112
113
  	for (i = start_sec; i <= end_sec; i++) {
  		err = __add_section(zone, i << PFN_SECTION_SHIFT);
3947be196   Dave Hansen   [PATCH] memory ho...
114

6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
115
  		/*
183ff22bb   Simon Arlott   spelling fixes: mm/
116
  		 * EEXIST is finally dealt with by ioresource collision
6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
117
118
  		 * check. see add_memory() => register_memory_resource()
  		 * Warning will be printed if there is collision.
bed120c64   Joel H Schopp   [PATCH] spufs: fi...
119
120
  		 */
  		if (err && (err != -EEXIST))
3947be196   Dave Hansen   [PATCH] memory ho...
121
  			break;
6f712711d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
122
  		err = 0;
3947be196   Dave Hansen   [PATCH] memory ho...
123
124
125
126
  	}
  
  	return err;
  }
bed120c64   Joel H Schopp   [PATCH] spufs: fi...
127
  EXPORT_SYMBOL_GPL(__add_pages);
3947be196   Dave Hansen   [PATCH] memory ho...
128
129
130
131
132
133
134
135
136
137
138
  
  static void grow_zone_span(struct zone *zone,
  		unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long old_zone_end_pfn;
  
  	zone_span_writelock(zone);
  
  	old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
  	if (start_pfn < zone->zone_start_pfn)
  		zone->zone_start_pfn = start_pfn;
25a6df952   Yasunori Goto   [PATCH] spanned_p...
139
140
  	zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
  				zone->zone_start_pfn;
3947be196   Dave Hansen   [PATCH] memory ho...
141
142
143
144
145
146
147
148
149
150
151
152
  
  	zone_span_writeunlock(zone);
  }
  
  static void grow_pgdat_span(struct pglist_data *pgdat,
  		unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long old_pgdat_end_pfn =
  		pgdat->node_start_pfn + pgdat->node_spanned_pages;
  
  	if (start_pfn < pgdat->node_start_pfn)
  		pgdat->node_start_pfn = start_pfn;
25a6df952   Yasunori Goto   [PATCH] spanned_p...
153
154
  	pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
  					pgdat->node_start_pfn;
3947be196   Dave Hansen   [PATCH] memory ho...
155
  }
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
156
157
  static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
  			void *arg)
3947be196   Dave Hansen   [PATCH] memory ho...
158
159
  {
  	unsigned long i;
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
  	unsigned long onlined_pages = *(unsigned long *)arg;
  	struct page *page;
  	if (PageReserved(pfn_to_page(start_pfn)))
  		for (i = 0; i < nr_pages; i++) {
  			page = pfn_to_page(start_pfn + i);
  			online_page(page);
  			onlined_pages++;
  		}
  	*(unsigned long *)arg = onlined_pages;
  	return 0;
  }
  
  
  int online_pages(unsigned long pfn, unsigned long nr_pages)
  {
3947be196   Dave Hansen   [PATCH] memory ho...
175
176
177
  	unsigned long flags;
  	unsigned long onlined_pages = 0;
  	struct zone *zone;
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
178
  	int need_zonelists_rebuild = 0;
7b78d335a   Yasunori Goto   memory hotplug: r...
179
180
181
182
183
184
185
186
187
188
189
  	int nid;
  	int ret;
  	struct memory_notify arg;
  
  	arg.start_pfn = pfn;
  	arg.nr_pages = nr_pages;
  	arg.status_change_nid = -1;
  
  	nid = page_to_nid(pfn_to_page(pfn));
  	if (node_present_pages(nid) == 0)
  		arg.status_change_nid = nid;
3947be196   Dave Hansen   [PATCH] memory ho...
190

7b78d335a   Yasunori Goto   memory hotplug: r...
191
192
193
194
195
196
  	ret = memory_notify(MEM_GOING_ONLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret) {
  		memory_notify(MEM_CANCEL_ONLINE, &arg);
  		return ret;
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
197
198
199
200
201
202
203
204
205
206
  	/*
  	 * This doesn't need a lock to do pfn_to_page().
  	 * The section can't be removed here because of the
  	 * memory_block->state_sem.
  	 */
  	zone = page_zone(pfn_to_page(pfn));
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	grow_zone_span(zone, pfn, pfn + nr_pages);
  	grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
207
208
209
210
211
212
213
  	/*
  	 * If this zone is not populated, then it is not in zonelist.
  	 * This means the page allocator ignores this zone.
  	 * So, zonelist must be updated after online.
  	 */
  	if (!populated_zone(zone))
  		need_zonelists_rebuild = 1;
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
214
215
  	walk_memory_resource(pfn, nr_pages, &onlined_pages,
  		online_pages_range);
3947be196   Dave Hansen   [PATCH] memory ho...
216
  	zone->present_pages += onlined_pages;
f2937be58   Yasunori Goto   [PATCH] memory ho...
217
  	zone->zone_pgdat->node_present_pages += onlined_pages;
3947be196   Dave Hansen   [PATCH] memory ho...
218

61b13993a   Dave Hansen   [PATCH] memory ho...
219
  	setup_per_zone_pages_min();
7ea1530ab   Christoph Lameter   Memoryless nodes:...
220
221
222
223
  	if (onlined_pages) {
  		kswapd_run(zone_to_nid(zone));
  		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
  	}
61b13993a   Dave Hansen   [PATCH] memory ho...
224

6811378e7   Yasunori Goto   [PATCH] wait_tabl...
225
226
  	if (need_zonelists_rebuild)
  		build_all_zonelists();
5a4d43615   KAMEZAWA Hiroyuki   [PATCH] update vm...
227
  	vm_total_pages = nr_free_pagecache_pages();
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
228
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
229
230
231
  
  	if (onlined_pages)
  		memory_notify(MEM_ONLINE, &arg);
3947be196   Dave Hansen   [PATCH] memory ho...
232
233
  	return 0;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
234
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
235

9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
  static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
  {
  	struct pglist_data *pgdat;
  	unsigned long zones_size[MAX_NR_ZONES] = {0};
  	unsigned long zholes_size[MAX_NR_ZONES] = {0};
  	unsigned long start_pfn = start >> PAGE_SHIFT;
  
  	pgdat = arch_alloc_nodedata(nid);
  	if (!pgdat)
  		return NULL;
  
  	arch_refresh_nodedata(nid, pgdat);
  
  	/* we can use NODE_DATA(nid) from here */
  
  	/* init node's zones as empty zones, we don't have any present pages.*/
  	free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);
  
  	return pgdat;
  }
  
  static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
  {
  	arch_refresh_nodedata(nid, NULL);
  	arch_free_nodedata(pgdat);
  	return;
  }
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
263

bc02af93d   Yasunori Goto   [PATCH] pgdat all...
264
265
  int add_memory(int nid, u64 start, u64 size)
  {
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
266
267
  	pg_data_t *pgdat = NULL;
  	int new_pgdat = 0;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
268
  	struct resource *res;
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
269
  	int ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
270
271
272
  	res = register_memory_resource(start, size);
  	if (!res)
  		return -EEXIST;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
273
274
275
276
277
  	if (!node_online(nid)) {
  		pgdat = hotadd_new_pgdat(nid, start);
  		if (!pgdat)
  			return -ENOMEM;
  		new_pgdat = 1;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
278
  	}
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
279
280
  	/* call arch's memory hotadd */
  	ret = arch_add_memory(nid, start, size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
281
282
  	if (ret < 0)
  		goto error;
0fc44159b   Yasunori Goto   [PATCH] Register ...
283
  	/* we online node here. we can't roll back from here. */
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
284
  	node_set_online(nid);
38837fc75   Paul Jackson   [PATCH] cpuset: t...
285
  	cpuset_track_online_nodes();
0fc44159b   Yasunori Goto   [PATCH] Register ...
286
287
288
289
290
291
292
293
294
  	if (new_pgdat) {
  		ret = register_one_node(nid);
  		/*
  		 * If sysfs file of new node can't create, cpu on the node
  		 * can't be hot-added. There is no rollback way now.
  		 * So, check by BUG_ON() to catch it reluctantly..
  		 */
  		BUG_ON(ret);
  	}
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
295
296
297
298
299
  	return ret;
  error:
  	/* rollback pgdat allocation and others */
  	if (new_pgdat)
  		rollback_node_hotadd(nid, pgdat);
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
300
301
  	if (res)
  		release_memory_resource(res);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
302

bc02af93d   Yasunori Goto   [PATCH] pgdat all...
303
304
305
  	return ret;
  }
  EXPORT_SYMBOL_GPL(add_memory);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  /*
   * Confirm all pages in a range [start, end) is belongs to the same zone.
   */
  static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct zone *zone = NULL;
  	struct page *page;
  	int i;
  	for (pfn = start_pfn;
  	     pfn < end_pfn;
  	     pfn += MAX_ORDER_NR_PAGES) {
  		i = 0;
  		/* This is just a CONFIG_HOLES_IN_ZONE check.*/
  		while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
  			i++;
  		if (i == MAX_ORDER_NR_PAGES)
  			continue;
  		page = pfn_to_page(pfn + i);
  		if (zone && page_zone(page) != zone)
  			return 0;
  		zone = page_zone(page);
  	}
  	return 1;
  }
  
  /*
   * Scanning pfn is much easier than scanning lru list.
   * Scan pfn from start to end and Find LRU page.
   */
  int scan_lru_pages(unsigned long start, unsigned long end)
  {
  	unsigned long pfn;
  	struct page *page;
  	for (pfn = start; pfn < end; pfn++) {
  		if (pfn_valid(pfn)) {
  			page = pfn_to_page(pfn);
  			if (PageLRU(page))
  				return pfn;
  		}
  	}
  	return 0;
  }
  
  static struct page *
  hotremove_migrate_alloc(struct page *page,
  			unsigned long private,
  			int **x)
  {
  	/* This should be improoooooved!! */
  	return alloc_page(GFP_HIGHUSER_PAGECACHE);
  }
  
  
  #define NR_OFFLINE_AT_ONCE_PAGES	(256)
  static int
  do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct page *page;
  	int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
  	int not_managed = 0;
  	int ret = 0;
  	LIST_HEAD(source);
  
  	for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
  		if (!pfn_valid(pfn))
  			continue;
  		page = pfn_to_page(pfn);
  		if (!page_count(page))
  			continue;
  		/*
  		 * We can skip free pages. And we can only deal with pages on
  		 * LRU.
  		 */
  		ret = isolate_lru_page(page, &source);
  		if (!ret) { /* Success */
  			move_pages--;
  		} else {
  			/* Becasue we don't have big zone->lock. we should
  			   check this again here. */
  			if (page_count(page))
  				not_managed++;
  #ifdef CONFIG_DEBUG_VM
  			printk(KERN_INFO "removing from LRU failed"
  					 " %lx/%d/%lx
  ",
  				pfn, page_count(page), page->flags);
  #endif
  		}
  	}
  	ret = -EBUSY;
  	if (not_managed) {
  		if (!list_empty(&source))
  			putback_lru_pages(&source);
  		goto out;
  	}
  	ret = 0;
  	if (list_empty(&source))
  		goto out;
  	/* this function returns # of failed pages */
  	ret = migrate_pages(&source, hotremove_migrate_alloc, 0);
  
  out:
  	return ret;
  }
  
  /*
   * remove from free_area[] and mark all as Reserved.
   */
  static int
  offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
  			void *data)
  {
  	__offline_isolated_pages(start, start + nr_pages);
  	return 0;
  }
  
  static void
  offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
  	walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,
  				offline_isolated_pages_cb);
  }
  
  /*
   * Check all pages in range, recoreded as memory resource, are isolated.
   */
  static int
  check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
  			void *data)
  {
  	int ret;
  	long offlined = *(long *)data;
  	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages);
  	offlined = nr_pages;
  	if (!ret)
  		*(long *)data += offlined;
  	return ret;
  }
  
  static long
  check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
  {
  	long offlined = 0;
  	int ret;
  
  	ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,
  			check_pages_isolated_cb);
  	if (ret < 0)
  		offlined = (long)ret;
  	return offlined;
  }
  
  extern void drain_all_local_pages(void);
  
  int offline_pages(unsigned long start_pfn,
  		  unsigned long end_pfn, unsigned long timeout)
  {
  	unsigned long pfn, nr_pages, expire;
  	long offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
469
  	int ret, drain, retry_max, node;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
470
  	struct zone *zone;
7b78d335a   Yasunori Goto   memory hotplug: r...
471
  	struct memory_notify arg;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
472
473
474
475
476
477
478
479
480
481
482
  
  	BUG_ON(start_pfn >= end_pfn);
  	/* at least, alignment against pageblock is necessary */
  	if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	/* This makes hotplug much easier...and readable.
  	   we assume this for now. .*/
  	if (!test_pages_in_a_zone(start_pfn, end_pfn))
  		return -EINVAL;
7b78d335a   Yasunori Goto   memory hotplug: r...
483
484
485
486
  
  	zone = page_zone(pfn_to_page(start_pfn));
  	node = zone_to_nid(zone);
  	nr_pages = end_pfn - start_pfn;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
487
488
489
490
  	/* set above range as isolated */
  	ret = start_isolate_page_range(start_pfn, end_pfn);
  	if (ret)
  		return ret;
7b78d335a   Yasunori Goto   memory hotplug: r...
491
492
493
494
495
496
497
498
499
500
501
  
  	arg.start_pfn = start_pfn;
  	arg.nr_pages = nr_pages;
  	arg.status_change_nid = -1;
  	if (nr_pages >= node_present_pages(node))
  		arg.status_change_nid = node;
  
  	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret)
  		goto failed_removal;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
  	pfn = start_pfn;
  	expire = jiffies + timeout;
  	drain = 0;
  	retry_max = 5;
  repeat:
  	/* start memory hot removal */
  	ret = -EAGAIN;
  	if (time_after(jiffies, expire))
  		goto failed_removal;
  	ret = -EINTR;
  	if (signal_pending(current))
  		goto failed_removal;
  	ret = 0;
  	if (drain) {
  		lru_add_drain_all();
  		flush_scheduled_work();
  		cond_resched();
  		drain_all_local_pages();
  	}
  
  	pfn = scan_lru_pages(start_pfn, end_pfn);
  	if (pfn) { /* We have page on LRU */
  		ret = do_migrate_range(pfn, end_pfn);
  		if (!ret) {
  			drain = 1;
  			goto repeat;
  		} else {
  			if (ret < 0)
  				if (--retry_max == 0)
  					goto failed_removal;
  			yield();
  			drain = 1;
  			goto repeat;
  		}
  	}
  	/* drain all zone's lru pagevec, this is asyncronous... */
  	lru_add_drain_all();
  	flush_scheduled_work();
  	yield();
  	/* drain pcp pages , this is synchrouns. */
  	drain_all_local_pages();
  	/* check again */
  	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
  	if (offlined_pages < 0) {
  		ret = -EBUSY;
  		goto failed_removal;
  	}
  	printk(KERN_INFO "Offlined Pages %ld
  ", offlined_pages);
  	/* Ok, all of our target is islaoted.
  	   We cannot do rollback at this point. */
  	offline_isolated_pages(start_pfn, end_pfn);
dbc0e4cef   KAMEZAWA Hiroyuki   memory hotremove:...
554
555
  	/* reset pagetype flags and makes migrate type to be MOVABLE */
  	undo_isolate_page_range(start_pfn, end_pfn);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
556
  	/* removal success */
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
557
558
559
560
  	zone->present_pages -= offlined_pages;
  	zone->zone_pgdat->node_present_pages -= offlined_pages;
  	totalram_pages -= offlined_pages;
  	num_physpages -= offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
561

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
562
563
  	vm_total_pages = nr_free_pagecache_pages();
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
564
565
  
  	memory_notify(MEM_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
566
567
568
569
570
571
  	return 0;
  
  failed_removal:
  	printk(KERN_INFO "memory offlining %lx to %lx failed
  ",
  		start_pfn, end_pfn);
7b78d335a   Yasunori Goto   memory hotplug: r...
572
  	memory_notify(MEM_CANCEL_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
573
574
  	/* pushback to free area */
  	undo_isolate_page_range(start_pfn, end_pfn);
7b78d335a   Yasunori Goto   memory hotplug: r...
575

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
576
577
  	return ret;
  }
48e94196a   KAMEZAWA Hiroyuki   fix memory hot re...
578
579
580
581
582
583
  #else
  int remove_memory(u64 start, u64 size)
  {
  	return -EINVAL;
  }
  EXPORT_SYMBOL_GPL(remove_memory);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
584
  #endif /* CONFIG_MEMORY_HOTREMOVE */