Blame view

mm/memory_hotplug.c 51.1 KB
3947be196   Dave Hansen   [PATCH] memory ho...
1
2
3
4
5
  /*
   *  linux/mm/memory_hotplug.c
   *
   *  Copyright (C)
   */
3947be196   Dave Hansen   [PATCH] memory ho...
6
7
8
9
10
  #include <linux/stddef.h>
  #include <linux/mm.h>
  #include <linux/swap.h>
  #include <linux/interrupt.h>
  #include <linux/pagemap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
11
  #include <linux/compiler.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
12
  #include <linux/export.h>
3947be196   Dave Hansen   [PATCH] memory ho...
13
  #include <linux/pagevec.h>
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
14
  #include <linux/writeback.h>
3947be196   Dave Hansen   [PATCH] memory ho...
15
16
17
18
19
20
21
  #include <linux/slab.h>
  #include <linux/sysctl.h>
  #include <linux/cpu.h>
  #include <linux/memory.h>
  #include <linux/memory_hotplug.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
22
  #include <linux/ioport.h>
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
23
24
25
  #include <linux/delay.h>
  #include <linux/migrate.h>
  #include <linux/page-isolation.h>
71088785c   Badari Pulavarty   mm: cleanup to ma...
26
  #include <linux/pfn.h>
6ad696d2c   Andi Kleen   mm: allow memory ...
27
  #include <linux/suspend.h>
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
28
  #include <linux/mm_inline.h>
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
29
  #include <linux/firmware-map.h>
60a5a19e7   Tang Chen   memory-hotplug: r...
30
  #include <linux/stop_machine.h>
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
31
  #include <linux/hugetlb.h>
c5320926e   Tang Chen   mem-hotplug: intr...
32
  #include <linux/memblock.h>
f784a3f19   Tang Chen   mem-hotplug: rese...
33
  #include <linux/bootmem.h>
3947be196   Dave Hansen   [PATCH] memory ho...
34
35
  
  #include <asm/tlbflush.h>
1e5ad9a3b   Adrian Bunk   mm/memory_hotplug...
36
  #include "internal.h"
9d0ad8ca4   Daniel Kiper   mm: extend memory...
37
38
39
40
41
42
43
44
45
46
  /*
   * online_page_callback contains pointer to current page onlining function.
   * Initially it is generic_online_page(). If it is required it could be
   * changed by calling set_online_page_callback() for callback registration
   * and restore_online_page_callback() for generic callback restore.
   */
  
  static void generic_online_page(struct page *page);
  
  static online_page_callback_t online_page_callback = generic_online_page;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
47
  static DEFINE_MUTEX(online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
48

bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
  /* The same as the cpu_hotplug lock, but for memory hotplug. */
  static struct {
  	struct task_struct *active_writer;
  	struct mutex lock; /* Synchronizes accesses to refcount, */
  	/*
  	 * Also blocks the new readers during
  	 * an ongoing mem hotplug operation.
  	 */
  	int refcount;
  
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  	struct lockdep_map dep_map;
  #endif
  } mem_hotplug = {
  	.active_writer = NULL,
  	.lock = __MUTEX_INITIALIZER(mem_hotplug.lock),
  	.refcount = 0,
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  	.dep_map = {.name = "mem_hotplug.lock" },
  #endif
  };
  
  /* Lockdep annotations for get/put_online_mems() and mem_hotplug_begin/end() */
  #define memhp_lock_acquire_read() lock_map_acquire_read(&mem_hotplug.dep_map)
  #define memhp_lock_acquire()      lock_map_acquire(&mem_hotplug.dep_map)
  #define memhp_lock_release()      lock_map_release(&mem_hotplug.dep_map)
  
  void get_online_mems(void)
  {
  	might_sleep();
  	if (mem_hotplug.active_writer == current)
  		return;
  	memhp_lock_acquire_read();
  	mutex_lock(&mem_hotplug.lock);
  	mem_hotplug.refcount++;
  	mutex_unlock(&mem_hotplug.lock);
  
  }
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
87

bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
88
  void put_online_mems(void)
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
89
  {
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
90
91
92
93
94
95
96
97
98
99
100
  	if (mem_hotplug.active_writer == current)
  		return;
  	mutex_lock(&mem_hotplug.lock);
  
  	if (WARN_ON(!mem_hotplug.refcount))
  		mem_hotplug.refcount++; /* try to fix things up */
  
  	if (!--mem_hotplug.refcount && unlikely(mem_hotplug.active_writer))
  		wake_up_process(mem_hotplug.active_writer);
  	mutex_unlock(&mem_hotplug.lock);
  	memhp_lock_release();
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
101
  }
30467e0b3   David Rientjes   mm, hotplug: fix ...
102
  void mem_hotplug_begin(void)
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
103
  {
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
104
105
106
107
108
109
110
111
112
113
114
  	mem_hotplug.active_writer = current;
  
  	memhp_lock_acquire();
  	for (;;) {
  		mutex_lock(&mem_hotplug.lock);
  		if (likely(!mem_hotplug.refcount))
  			break;
  		__set_current_state(TASK_UNINTERRUPTIBLE);
  		mutex_unlock(&mem_hotplug.lock);
  		schedule();
  	}
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
115
  }
30467e0b3   David Rientjes   mm, hotplug: fix ...
116
  void mem_hotplug_done(void)
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
117
118
119
120
121
  {
  	mem_hotplug.active_writer = NULL;
  	mutex_unlock(&mem_hotplug.lock);
  	memhp_lock_release();
  }
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
122

45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
123
124
125
126
127
128
129
130
131
132
  /* add this memory to iomem resource */
  static struct resource *register_memory_resource(u64 start, u64 size)
  {
  	struct resource *res;
  	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
  	BUG_ON(!res);
  
  	res->name = "System RAM";
  	res->start = start;
  	res->end = start + size - 1;
887c3cb18   Yasunori Goto   Add IORESOUCE_BUS...
133
  	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
134
  	if (request_resource(&iomem_resource, res) < 0) {
4996eed86   Toshi Kani   mm/memory_hotplug...
135
136
  		pr_debug("System RAM resource %pR cannot be added
  ", res);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
137
138
139
140
141
142
143
144
145
146
147
148
149
150
  		kfree(res);
  		res = NULL;
  	}
  	return res;
  }
  
  static void release_memory_resource(struct resource *res)
  {
  	if (!res)
  		return;
  	release_resource(res);
  	kfree(res);
  	return;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
151
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
152
153
  void get_page_bootmem(unsigned long info,  struct page *page,
  		      unsigned long type)
047532787   Yasunori Goto   memory hotplug: r...
154
  {
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
155
  	page->lru.next = (struct list_head *) type;
047532787   Yasunori Goto   memory hotplug: r...
156
157
158
159
  	SetPagePrivate(page);
  	set_page_private(page, info);
  	atomic_inc(&page->_count);
  }
170a5a7eb   Jiang Liu   mm: make __free_p...
160
  void put_page_bootmem(struct page *page)
047532787   Yasunori Goto   memory hotplug: r...
161
  {
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
162
  	unsigned long type;
047532787   Yasunori Goto   memory hotplug: r...
163

5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
164
165
166
  	type = (unsigned long) page->lru.next;
  	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
  	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
047532787   Yasunori Goto   memory hotplug: r...
167
168
169
170
  
  	if (atomic_dec_return(&page->_count) == 1) {
  		ClearPagePrivate(page);
  		set_page_private(page, 0);
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
171
  		INIT_LIST_HEAD(&page->lru);
170a5a7eb   Jiang Liu   mm: make __free_p...
172
  		free_reserved_page(page);
047532787   Yasunori Goto   memory hotplug: r...
173
  	}
047532787   Yasunori Goto   memory hotplug: r...
174
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
175
176
  #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
d92bc3185   Adrian Bunk   mm: make register...
177
  static void register_page_bootmem_info_section(unsigned long start_pfn)
047532787   Yasunori Goto   memory hotplug: r...
178
179
180
181
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
047532787   Yasunori Goto   memory hotplug: r...
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	/* Get section's memmap address */
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	/*
  	 * Get page for the memmap's phys address
  	 * XXX: need more consideration for sparse_vmemmap...
  	 */
  	page = virt_to_page(memmap);
  	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
  	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
  
  	/* remember memmap's page */
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, SECTION_INFO);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
af370fb8c   Yasunori Goto   memory hotplug: s...
206
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
047532787   Yasunori Goto   memory hotplug: r...
207
208
  
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
  #else /* CONFIG_SPARSEMEM_VMEMMAP */
  static void register_page_bootmem_info_section(unsigned long start_pfn)
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
  
  	if (!pfn_valid(start_pfn))
  		return;
  
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
047532787   Yasunori Goto   memory hotplug: r...
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
  
  void register_page_bootmem_info_node(struct pglist_data *pgdat)
  {
  	unsigned long i, pfn, end_pfn, nr_pages;
  	int node = pgdat->node_id;
  	struct page *page;
  	struct zone *zone;
  
  	nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
  	page = virt_to_page(pgdat);
  
  	for (i = 0; i < nr_pages; i++, page++)
  		get_page_bootmem(node, page, NODE_INFO);
  
  	zone = &pgdat->node_zones[0];
  	for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
139c2d75b   Xishi Qiu   mm: use zone_is_i...
251
  		if (zone_is_initialized(zone)) {
047532787   Yasunori Goto   memory hotplug: r...
252
253
254
255
256
257
258
259
260
261
262
  			nr_pages = zone->wait_table_hash_nr_entries
  				* sizeof(wait_queue_head_t);
  			nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
  			page = virt_to_page(zone->wait_table);
  
  			for (i = 0; i < nr_pages; i++, page++)
  				get_page_bootmem(node, page, NODE_INFO);
  		}
  	}
  
  	pfn = pgdat->node_start_pfn;
c1f194952   Cody P Schafer   mm/memory_hotplug...
263
  	end_pfn = pgdat_end_pfn(pgdat);
047532787   Yasunori Goto   memory hotplug: r...
264

7e9f5eb03   Tang Chen   mm/memory_hotplug...
265
  	/* register section info */
f14851af0   qiuxishi   memory hotplug: f...
266
267
268
269
270
  	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		/*
  		 * Some platforms can assign the same pfn to multiple nodes - on
  		 * node0 as well as nodeN.  To avoid registering a pfn against
  		 * multiple nodes we check that this pfn does not already
7e9f5eb03   Tang Chen   mm/memory_hotplug...
271
  		 * reside in some other nodes.
f14851af0   qiuxishi   memory hotplug: f...
272
273
274
275
  		 */
  		if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node))
  			register_page_bootmem_info_section(pfn);
  	}
047532787   Yasunori Goto   memory hotplug: r...
276
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
277
  #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
047532787   Yasunori Goto   memory hotplug: r...
278

f27654044   Fabian Frederick   mm/memory_hotplug...
279
280
  static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
  				     unsigned long end_pfn)
76cdd58e5   Heiko Carstens   memory_hotplug: a...
281
282
283
284
  {
  	unsigned long old_zone_end_pfn;
  
  	zone_span_writelock(zone);
c33bc315f   Xishi Qiu   mm: use zone_end_...
285
  	old_zone_end_pfn = zone_end_pfn(zone);
8080fc038   Xishi Qiu   mm: use zone_is_e...
286
  	if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
76cdd58e5   Heiko Carstens   memory_hotplug: a...
287
288
289
290
291
292
293
  		zone->zone_start_pfn = start_pfn;
  
  	zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
  				zone->zone_start_pfn;
  
  	zone_span_writeunlock(zone);
  }
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
294
295
296
297
  static void resize_zone(struct zone *zone, unsigned long start_pfn,
  		unsigned long end_pfn)
  {
  	zone_span_writelock(zone);
e455a9b92   Lai Jiangshan   memory_hotplug: h...
298
299
300
301
302
303
304
305
306
307
308
  	if (end_pfn - start_pfn) {
  		zone->zone_start_pfn = start_pfn;
  		zone->spanned_pages = end_pfn - start_pfn;
  	} else {
  		/*
  		 * make it consist as free_area_init_core(),
  		 * if spanned_pages = 0, then keep start_pfn = 0
  		 */
  		zone->zone_start_pfn = 0;
  		zone->spanned_pages = 0;
  	}
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
309
310
311
312
313
314
315
316
317
318
319
320
321
322
  
  	zone_span_writeunlock(zone);
  }
  
  static void fix_zone_id(struct zone *zone, unsigned long start_pfn,
  		unsigned long end_pfn)
  {
  	enum zone_type zid = zone_idx(zone);
  	int nid = zone->zone_pgdat->node_id;
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn++)
  		set_page_links(pfn_to_page(pfn), zid, nid, pfn);
  }
f6bbb78e5   Cody P Schafer   mm: add helper en...
323
  /* Can fail with -ENOMEM from allocating a wait table with vmalloc() or
9e43aa2b8   Santosh Shilimkar   mm/memory_hotplug...
324
   * alloc_bootmem_node_nopanic()/memblock_virt_alloc_node_nopanic() */
f6bbb78e5   Cody P Schafer   mm: add helper en...
325
326
327
328
329
330
331
332
  static int __ref ensure_zone_is_initialized(struct zone *zone,
  			unsigned long start_pfn, unsigned long num_pages)
  {
  	if (!zone_is_initialized(zone))
  		return init_currently_empty_zone(zone, start_pfn, num_pages,
  						 MEMMAP_HOTPLUG);
  	return 0;
  }
e455a9b92   Lai Jiangshan   memory_hotplug: h...
333
  static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
334
335
  		unsigned long start_pfn, unsigned long end_pfn)
  {
e455a9b92   Lai Jiangshan   memory_hotplug: h...
336
  	int ret;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
337
  	unsigned long flags;
e455a9b92   Lai Jiangshan   memory_hotplug: h...
338
  	unsigned long z1_start_pfn;
64dd1b29b   Cody P Schafer   mm/memory_hotplug...
339
340
341
  	ret = ensure_zone_is_initialized(z1, start_pfn, end_pfn - start_pfn);
  	if (ret)
  		return ret;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
342
343
344
345
  
  	pgdat_resize_lock(z1->zone_pgdat, &flags);
  
  	/* can't move pfns which are higher than @z2 */
108bcc96e   Cody P Schafer   mm: add & use zon...
346
  	if (end_pfn > zone_end_pfn(z2))
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
347
  		goto out_fail;
834405c3b   Jiang Liu   mm: fix some triv...
348
  	/* the move out part must be at the left most of @z2 */
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
349
350
351
352
353
  	if (start_pfn > z2->zone_start_pfn)
  		goto out_fail;
  	/* must included/overlap */
  	if (end_pfn <= z2->zone_start_pfn)
  		goto out_fail;
e455a9b92   Lai Jiangshan   memory_hotplug: h...
354
  	/* use start_pfn for z1's start_pfn if z1 is empty */
8080fc038   Xishi Qiu   mm: use zone_is_e...
355
  	if (!zone_is_empty(z1))
e455a9b92   Lai Jiangshan   memory_hotplug: h...
356
357
358
359
360
  		z1_start_pfn = z1->zone_start_pfn;
  	else
  		z1_start_pfn = start_pfn;
  
  	resize_zone(z1, z1_start_pfn, end_pfn);
108bcc96e   Cody P Schafer   mm: add & use zon...
361
  	resize_zone(z2, end_pfn, zone_end_pfn(z2));
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
362
363
364
365
366
367
368
369
370
371
  
  	pgdat_resize_unlock(z1->zone_pgdat, &flags);
  
  	fix_zone_id(z1, start_pfn, end_pfn);
  
  	return 0;
  out_fail:
  	pgdat_resize_unlock(z1->zone_pgdat, &flags);
  	return -1;
  }
e455a9b92   Lai Jiangshan   memory_hotplug: h...
372
  static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2,
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
373
374
  		unsigned long start_pfn, unsigned long end_pfn)
  {
e455a9b92   Lai Jiangshan   memory_hotplug: h...
375
  	int ret;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
376
  	unsigned long flags;
e455a9b92   Lai Jiangshan   memory_hotplug: h...
377
  	unsigned long z2_end_pfn;
64dd1b29b   Cody P Schafer   mm/memory_hotplug...
378
379
380
  	ret = ensure_zone_is_initialized(z2, start_pfn, end_pfn - start_pfn);
  	if (ret)
  		return ret;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
381
382
383
384
385
386
387
  
  	pgdat_resize_lock(z1->zone_pgdat, &flags);
  
  	/* can't move pfns which are lower than @z1 */
  	if (z1->zone_start_pfn > start_pfn)
  		goto out_fail;
  	/* the move out part mast at the right most of @z1 */
108bcc96e   Cody P Schafer   mm: add & use zon...
388
  	if (zone_end_pfn(z1) >  end_pfn)
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
389
390
  		goto out_fail;
  	/* must included/overlap */
108bcc96e   Cody P Schafer   mm: add & use zon...
391
  	if (start_pfn >= zone_end_pfn(z1))
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
392
  		goto out_fail;
e455a9b92   Lai Jiangshan   memory_hotplug: h...
393
  	/* use end_pfn for z2's end_pfn if z2 is empty */
8080fc038   Xishi Qiu   mm: use zone_is_e...
394
  	if (!zone_is_empty(z2))
108bcc96e   Cody P Schafer   mm: add & use zon...
395
  		z2_end_pfn = zone_end_pfn(z2);
e455a9b92   Lai Jiangshan   memory_hotplug: h...
396
397
  	else
  		z2_end_pfn = end_pfn;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
398
  	resize_zone(z1, z1->zone_start_pfn, start_pfn);
e455a9b92   Lai Jiangshan   memory_hotplug: h...
399
  	resize_zone(z2, start_pfn, z2_end_pfn);
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
400
401
402
403
404
405
406
407
408
409
  
  	pgdat_resize_unlock(z1->zone_pgdat, &flags);
  
  	fix_zone_id(z2, start_pfn, end_pfn);
  
  	return 0;
  out_fail:
  	pgdat_resize_unlock(z1->zone_pgdat, &flags);
  	return -1;
  }
f27654044   Fabian Frederick   mm/memory_hotplug...
410
411
  static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
  				      unsigned long end_pfn)
76cdd58e5   Heiko Carstens   memory_hotplug: a...
412
  {
83285c72e   Xishi Qiu   mm: use pgdat_end...
413
  	unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
76cdd58e5   Heiko Carstens   memory_hotplug: a...
414

712cd386f   Tang Chen   mm/memory_hotplug...
415
  	if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
76cdd58e5   Heiko Carstens   memory_hotplug: a...
416
417
418
419
420
  		pgdat->node_start_pfn = start_pfn;
  
  	pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
  					pgdat->node_start_pfn;
  }
31168481c   Al Viro   meminit section w...
421
  static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
3947be196   Dave Hansen   [PATCH] memory ho...
422
423
424
425
426
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nr_pages = PAGES_PER_SECTION;
  	int nid = pgdat->node_id;
  	int zone_type;
76cdd58e5   Heiko Carstens   memory_hotplug: a...
427
  	unsigned long flags;
64dd1b29b   Cody P Schafer   mm/memory_hotplug...
428
  	int ret;
3947be196   Dave Hansen   [PATCH] memory ho...
429
430
  
  	zone_type = zone - pgdat->node_zones;
64dd1b29b   Cody P Schafer   mm/memory_hotplug...
431
432
433
  	ret = ensure_zone_is_initialized(zone, phys_start_pfn, nr_pages);
  	if (ret)
  		return ret;
76cdd58e5   Heiko Carstens   memory_hotplug: a...
434

76cdd58e5   Heiko Carstens   memory_hotplug: a...
435
436
437
438
439
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages);
  	grow_pgdat_span(zone->zone_pgdat, phys_start_pfn,
  			phys_start_pfn + nr_pages);
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
a2f3aa025   Dave Hansen   [PATCH] Fix spars...
440
441
  	memmap_init_zone(nr_pages, nid, zone_type,
  			 phys_start_pfn, MEMMAP_HOTPLUG);
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
442
  	return 0;
3947be196   Dave Hansen   [PATCH] memory ho...
443
  }
c04fc586c   Gary Hade   mm: show node to ...
444
445
  static int __meminit __add_section(int nid, struct zone *zone,
  					unsigned long phys_start_pfn)
3947be196   Dave Hansen   [PATCH] memory ho...
446
  {
3947be196   Dave Hansen   [PATCH] memory ho...
447
  	int ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
448
449
  	if (pfn_valid(phys_start_pfn))
  		return -EEXIST;
85b35feae   Zhang Yanfei   mm/sparsemem: use...
450
  	ret = sparse_add_one_section(zone, phys_start_pfn);
3947be196   Dave Hansen   [PATCH] memory ho...
451
452
453
  
  	if (ret < 0)
  		return ret;
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
454
455
456
457
  	ret = __add_zone(zone, phys_start_pfn);
  
  	if (ret < 0)
  		return ret;
c04fc586c   Gary Hade   mm: show node to ...
458
  	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
3947be196   Dave Hansen   [PATCH] memory ho...
459
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
  /*
   * Reasonably generic function for adding memory.  It is
   * expected that archs that support memory hotplug will
   * call this function after deciding the zone to which to
   * add the new pages.
   */
  int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
  			unsigned long nr_pages)
  {
  	unsigned long i;
  	int err = 0;
  	int start_sec, end_sec;
  	/* during initialize mem_map, align hot-added range to section */
  	start_sec = pfn_to_section_nr(phys_start_pfn);
  	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
  
  	for (i = start_sec; i <= end_sec; i++) {
19c07d5e0   Sheng Yong   memory hotplug: u...
477
  		err = __add_section(nid, zone, section_nr_to_pfn(i));
4edd7ceff   David Rientjes   mm, hotplug: avoi...
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
  
  		/*
  		 * EEXIST is finally dealt with by ioresource collision
  		 * check. see add_memory() => register_memory_resource()
  		 * Warning will be printed if there is collision.
  		 */
  		if (err && (err != -EEXIST))
  			break;
  		err = 0;
  	}
  
  	return err;
  }
  EXPORT_SYMBOL_GPL(__add_pages);
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
  /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
  static int find_smallest_section_pfn(int nid, struct zone *zone,
  				     unsigned long start_pfn,
  				     unsigned long end_pfn)
  {
  	struct mem_section *ms;
  
  	for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(start_pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (unlikely(pfn_to_nid(start_pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(start_pfn)))
  			continue;
  
  		return start_pfn;
  	}
  
  	return 0;
  }
  
  /* find the biggest valid pfn in the range [start_pfn, end_pfn). */
  static int find_biggest_section_pfn(int nid, struct zone *zone,
  				    unsigned long start_pfn,
  				    unsigned long end_pfn)
  {
  	struct mem_section *ms;
  	unsigned long pfn;
  
  	/* pfn is the end pfn of a memory section. */
  	pfn = end_pfn - 1;
  	for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (unlikely(pfn_to_nid(pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(pfn)))
  			continue;
  
  		return pfn;
  	}
  
  	return 0;
  }
  
  static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
  			     unsigned long end_pfn)
  {
c33bc315f   Xishi Qiu   mm: use zone_end_...
550
551
552
  	unsigned long zone_start_pfn = zone->zone_start_pfn;
  	unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
  	unsigned long zone_end_pfn = z;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
  	unsigned long pfn;
  	struct mem_section *ms;
  	int nid = zone_to_nid(zone);
  
  	zone_span_writelock(zone);
  	if (zone_start_pfn == start_pfn) {
  		/*
  		 * If the section is smallest section in the zone, it need
  		 * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
  		 * In this case, we find second smallest valid mem_section
  		 * for shrinking zone.
  		 */
  		pfn = find_smallest_section_pfn(nid, zone, end_pfn,
  						zone_end_pfn);
  		if (pfn) {
  			zone->zone_start_pfn = pfn;
  			zone->spanned_pages = zone_end_pfn - pfn;
  		}
  	} else if (zone_end_pfn == end_pfn) {
  		/*
  		 * If the section is biggest section in the zone, it need
  		 * shrink zone->spanned_pages.
  		 * In this case, we find second biggest valid mem_section for
  		 * shrinking zone.
  		 */
  		pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
  					       start_pfn);
  		if (pfn)
  			zone->spanned_pages = pfn - zone_start_pfn + 1;
  	}
  
  	/*
  	 * The section is not biggest or smallest mem_section in the zone, it
  	 * only creates a hole in the zone. So in this case, we need not
  	 * change the zone. But perhaps, the zone has only hole data. Thus
  	 * it check the zone has only hole or not.
  	 */
  	pfn = zone_start_pfn;
  	for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (page_zone(pfn_to_page(pfn)) != zone)
  			continue;
  
  		 /* If the section is current section, it continues the loop */
  		if (start_pfn == pfn)
  			continue;
  
  		/* If we find valid section, we have nothing to do */
  		zone_span_writeunlock(zone);
  		return;
  	}
  
  	/* The zone has no valid section */
  	zone->zone_start_pfn = 0;
  	zone->spanned_pages = 0;
  	zone_span_writeunlock(zone);
  }
  
  static void shrink_pgdat_span(struct pglist_data *pgdat,
  			      unsigned long start_pfn, unsigned long end_pfn)
  {
83285c72e   Xishi Qiu   mm: use pgdat_end...
618
619
620
  	unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
  	unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
  	unsigned long pgdat_end_pfn = p;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
  	unsigned long pfn;
  	struct mem_section *ms;
  	int nid = pgdat->node_id;
  
  	if (pgdat_start_pfn == start_pfn) {
  		/*
  		 * If the section is smallest section in the pgdat, it need
  		 * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
  		 * In this case, we find second smallest valid mem_section
  		 * for shrinking zone.
  		 */
  		pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
  						pgdat_end_pfn);
  		if (pfn) {
  			pgdat->node_start_pfn = pfn;
  			pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
  		}
  	} else if (pgdat_end_pfn == end_pfn) {
  		/*
  		 * If the section is biggest section in the pgdat, it need
  		 * shrink pgdat->node_spanned_pages.
  		 * In this case, we find second biggest valid mem_section for
  		 * shrinking zone.
  		 */
  		pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
  					       start_pfn);
  		if (pfn)
  			pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
  	}
  
  	/*
  	 * If the section is not biggest or smallest mem_section in the pgdat,
  	 * it only creates a hole in the pgdat. So in this case, we need not
  	 * change the pgdat.
  	 * But perhaps, the pgdat has only hole data. Thus it check the pgdat
  	 * has only hole or not.
  	 */
  	pfn = pgdat_start_pfn;
  	for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (pfn_to_nid(pfn) != nid)
  			continue;
  
  		 /* If the section is current section, it continues the loop */
  		if (start_pfn == pfn)
  			continue;
  
  		/* If we find valid section, we have nothing to do */
  		return;
  	}
  
  	/* The pgdat has no valid section */
  	pgdat->node_start_pfn = 0;
  	pgdat->node_spanned_pages = 0;
  }
  
  static void __remove_zone(struct zone *zone, unsigned long start_pfn)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nr_pages = PAGES_PER_SECTION;
  	int zone_type;
  	unsigned long flags;
  
  	zone_type = zone - pgdat->node_zones;
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
  	shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
695
696
  static int __remove_section(struct zone *zone, struct mem_section *ms)
  {
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
697
698
  	unsigned long start_pfn;
  	int scn_nr;
ea01ea937   Badari Pulavarty   hotplug memory re...
699
700
701
702
703
704
705
706
  	int ret = -EINVAL;
  
  	if (!valid_section(ms))
  		return ret;
  
  	ret = unregister_memory_section(ms);
  	if (ret)
  		return ret;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
707
708
709
  	scn_nr = __section_nr(ms);
  	start_pfn = section_nr_to_pfn(scn_nr);
  	__remove_zone(zone, start_pfn);
ea01ea937   Badari Pulavarty   hotplug memory re...
710
  	sparse_remove_one_section(zone, ms);
ea01ea937   Badari Pulavarty   hotplug memory re...
711
712
  	return 0;
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
713
714
715
716
717
718
719
720
721
722
723
724
725
726
  /**
   * __remove_pages() - remove sections of pages from a zone
   * @zone: zone from which pages need to be removed
   * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
   * @nr_pages: number of pages to remove (must be multiple of section size)
   *
   * Generic helper function to remove section mappings and sysfs entries
   * for the section of the memory we are removing. Caller needs to make
   * sure that pages are marked reserved and zones are adjust properly by
   * calling offline_pages().
   */
  int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
  		 unsigned long nr_pages)
  {
fe74ebb10   Toshi Kani   mm: change __remo...
727
  	unsigned long i;
ea01ea937   Badari Pulavarty   hotplug memory re...
728
  	int sections_to_remove;
fe74ebb10   Toshi Kani   mm: change __remo...
729
730
  	resource_size_t start, size;
  	int ret = 0;
ea01ea937   Badari Pulavarty   hotplug memory re...
731
732
733
734
735
736
  
  	/*
  	 * We can only remove entire sections
  	 */
  	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
  	BUG_ON(nr_pages % PAGES_PER_SECTION);
fe74ebb10   Toshi Kani   mm: change __remo...
737
738
739
  	start = phys_start_pfn << PAGE_SHIFT;
  	size = nr_pages * PAGE_SIZE;
  	ret = release_mem_region_adjustable(&iomem_resource, start, size);
348f9f05e   Randy Dunlap   mm/memory_hotplug...
740
741
742
743
744
745
746
  	if (ret) {
  		resource_size_t endres = start + size - 1;
  
  		pr_warn("Unable to release resource <%pa-%pa> (%d)
  ",
  				&start, &endres, ret);
  	}
d760afd4d   Yasuaki Ishimatsu   memory-hotplug: s...
747

ea01ea937   Badari Pulavarty   hotplug memory re...
748
749
750
751
752
753
754
755
756
757
  	sections_to_remove = nr_pages / PAGES_PER_SECTION;
  	for (i = 0; i < sections_to_remove; i++) {
  		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
  		ret = __remove_section(zone, __pfn_to_section(pfn));
  		if (ret)
  			break;
  	}
  	return ret;
  }
  EXPORT_SYMBOL_GPL(__remove_pages);
4edd7ceff   David Rientjes   mm, hotplug: avoi...
758
  #endif /* CONFIG_MEMORY_HOTREMOVE */
ea01ea937   Badari Pulavarty   hotplug memory re...
759

9d0ad8ca4   Daniel Kiper   mm: extend memory...
760
761
762
  int set_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
763
764
  	get_online_mems();
  	mutex_lock(&online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
765
766
767
768
769
  
  	if (online_page_callback == generic_online_page) {
  		online_page_callback = callback;
  		rc = 0;
  	}
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
770
771
  	mutex_unlock(&online_page_callback_lock);
  	put_online_mems();
9d0ad8ca4   Daniel Kiper   mm: extend memory...
772
773
774
775
776
777
778
779
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(set_online_page_callback);
  
  int restore_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
780
781
  	get_online_mems();
  	mutex_lock(&online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
782
783
784
785
786
  
  	if (online_page_callback == callback) {
  		online_page_callback = generic_online_page;
  		rc = 0;
  	}
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
787
788
  	mutex_unlock(&online_page_callback_lock);
  	put_online_mems();
9d0ad8ca4   Daniel Kiper   mm: extend memory...
789
790
791
792
793
794
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(restore_online_page_callback);
  
  void __online_page_set_limits(struct page *page)
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
795
  {
9d0ad8ca4   Daniel Kiper   mm: extend memory...
796
797
798
799
800
  }
  EXPORT_SYMBOL_GPL(__online_page_set_limits);
  
  void __online_page_increment_counters(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
801
  	adjust_managed_page_count(page, 1);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
802
803
  }
  EXPORT_SYMBOL_GPL(__online_page_increment_counters);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
804

9d0ad8ca4   Daniel Kiper   mm: extend memory...
805
806
  void __online_page_free(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
807
  	__free_reserved_page(page);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
808
  }
9d0ad8ca4   Daniel Kiper   mm: extend memory...
809
810
811
812
813
814
815
816
  EXPORT_SYMBOL_GPL(__online_page_free);
  
  static void generic_online_page(struct page *page)
  {
  	__online_page_set_limits(page);
  	__online_page_increment_counters(page);
  	__online_page_free(page);
  }
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
817

75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
818
819
  static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
  			void *arg)
3947be196   Dave Hansen   [PATCH] memory ho...
820
821
  {
  	unsigned long i;
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
822
823
824
825
826
  	unsigned long onlined_pages = *(unsigned long *)arg;
  	struct page *page;
  	if (PageReserved(pfn_to_page(start_pfn)))
  		for (i = 0; i < nr_pages; i++) {
  			page = pfn_to_page(start_pfn + i);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
827
  			(*online_page_callback)(page);
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
828
829
830
831
832
  			onlined_pages++;
  		}
  	*(unsigned long *)arg = onlined_pages;
  	return 0;
  }
09285af75   Lai Jiangshan   memory_hotplug: a...
833
  #ifdef CONFIG_MOVABLE_NODE
79a4dcefd   Tang Chen   mm/memory_hotplug...
834
835
836
837
  /*
   * When CONFIG_MOVABLE_NODE, we permit onlining of a node which doesn't have
   * normal memory.
   */
09285af75   Lai Jiangshan   memory_hotplug: a...
838
839
840
841
  static bool can_online_high_movable(struct zone *zone)
  {
  	return true;
  }
79a4dcefd   Tang Chen   mm/memory_hotplug...
842
  #else /* CONFIG_MOVABLE_NODE */
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
843
844
845
846
847
  /* ensure every online node has NORMAL memory */
  static bool can_online_high_movable(struct zone *zone)
  {
  	return node_state(zone_to_nid(zone), N_NORMAL_MEMORY);
  }
79a4dcefd   Tang Chen   mm/memory_hotplug...
848
  #endif /* CONFIG_MOVABLE_NODE */
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
849

d9713679d   Lai Jiangshan   memory_hotplug: f...
850
851
852
853
854
855
856
857
  /* check which state of node_states will be changed when online memory */
  static void node_states_check_changes_online(unsigned long nr_pages,
  	struct zone *zone, struct memory_notify *arg)
  {
  	int nid = zone_to_nid(zone);
  	enum zone_type zone_last = ZONE_NORMAL;
  
  	/*
6715ddf94   Lai Jiangshan   hotplug: update n...
858
859
860
  	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_NORMAL,
  	 * set zone_last to ZONE_NORMAL.
d9713679d   Lai Jiangshan   memory_hotplug: f...
861
  	 *
6715ddf94   Lai Jiangshan   hotplug: update n...
862
863
864
  	 * If we don't have HIGHMEM nor movable node,
  	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
  	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
d9713679d   Lai Jiangshan   memory_hotplug: f...
865
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
866
  	if (N_MEMORY == N_NORMAL_MEMORY)
d9713679d   Lai Jiangshan   memory_hotplug: f...
867
868
869
870
871
872
873
874
875
876
877
878
  		zone_last = ZONE_MOVABLE;
  
  	/*
  	 * if the memory to be online is in a zone of 0...zone_last, and
  	 * the zones of 0...zone_last don't have memory before online, we will
  	 * need to set the node to node_states[N_NORMAL_MEMORY] after
  	 * the memory is online.
  	 */
  	if (zone_idx(zone) <= zone_last && !node_state(nid, N_NORMAL_MEMORY))
  		arg->status_change_nid_normal = nid;
  	else
  		arg->status_change_nid_normal = -1;
6715ddf94   Lai Jiangshan   hotplug: update n...
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
  #ifdef CONFIG_HIGHMEM
  	/*
  	 * If we have movable node, node_states[N_HIGH_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
  	 * set zone_last to ZONE_HIGHMEM.
  	 *
  	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_MOVABLE,
  	 * set zone_last to ZONE_MOVABLE.
  	 */
  	zone_last = ZONE_HIGHMEM;
  	if (N_MEMORY == N_HIGH_MEMORY)
  		zone_last = ZONE_MOVABLE;
  
  	if (zone_idx(zone) <= zone_last && !node_state(nid, N_HIGH_MEMORY))
  		arg->status_change_nid_high = nid;
  	else
  		arg->status_change_nid_high = -1;
  #else
  	arg->status_change_nid_high = arg->status_change_nid_normal;
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
900
901
  	/*
  	 * if the node don't have memory befor online, we will need to
6715ddf94   Lai Jiangshan   hotplug: update n...
902
  	 * set the node to node_states[N_MEMORY] after the memory
d9713679d   Lai Jiangshan   memory_hotplug: f...
903
904
  	 * is online.
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
905
  	if (!node_state(nid, N_MEMORY))
d9713679d   Lai Jiangshan   memory_hotplug: f...
906
907
908
909
910
911
912
913
914
  		arg->status_change_nid = nid;
  	else
  		arg->status_change_nid = -1;
  }
  
  static void node_states_set_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_set_state(node, N_NORMAL_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
915
916
917
918
  	if (arg->status_change_nid_high >= 0)
  		node_set_state(node, N_HIGH_MEMORY);
  
  	node_set_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
919
  }
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
920

30467e0b3   David Rientjes   mm, hotplug: fix ...
921
  /* Must be protected by mem_hotplug_begin() */
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
922
  int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
923
  {
aa47228a1   Cody P Schafer   memory_hotplug: u...
924
  	unsigned long flags;
3947be196   Dave Hansen   [PATCH] memory ho...
925
926
  	unsigned long onlined_pages = 0;
  	struct zone *zone;
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
927
  	int need_zonelists_rebuild = 0;
7b78d335a   Yasunori Goto   memory hotplug: r...
928
929
930
  	int nid;
  	int ret;
  	struct memory_notify arg;
d9713679d   Lai Jiangshan   memory_hotplug: f...
931
932
933
934
935
936
  	/*
  	 * This doesn't need a lock to do pfn_to_page().
  	 * The section can't be removed here because of the
  	 * memory_block->state_mutex.
  	 */
  	zone = page_zone(pfn_to_page(pfn));
4f7c6b49c   Tang Chen   mem-hotplug: intr...
937
938
  	if ((zone_idx(zone) > ZONE_NORMAL ||
  	    online_type == MMOP_ONLINE_MOVABLE) &&
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
939
  	    !can_online_high_movable(zone))
30467e0b3   David Rientjes   mm, hotplug: fix ...
940
  		return -EINVAL;
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
941

4f7c6b49c   Tang Chen   mem-hotplug: intr...
942
943
  	if (online_type == MMOP_ONLINE_KERNEL &&
  	    zone_idx(zone) == ZONE_MOVABLE) {
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
944
  		if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
30467e0b3   David Rientjes   mm, hotplug: fix ...
945
  			return -EINVAL;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
946
  	}
4f7c6b49c   Tang Chen   mem-hotplug: intr...
947
948
  	if (online_type == MMOP_ONLINE_MOVABLE &&
  	    zone_idx(zone) == ZONE_MOVABLE - 1) {
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
949
  		if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
30467e0b3   David Rientjes   mm, hotplug: fix ...
950
  			return -EINVAL;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
951
952
953
954
  	}
  
  	/* Previous code may changed the zone of the pfn range */
  	zone = page_zone(pfn_to_page(pfn));
7b78d335a   Yasunori Goto   memory hotplug: r...
955
956
  	arg.start_pfn = pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
957
  	node_states_check_changes_online(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
958

9c2606b77   Xishi Qiu   mm/memory_hotplug...
959
  	nid = pfn_to_nid(pfn);
3947be196   Dave Hansen   [PATCH] memory ho...
960

7b78d335a   Yasunori Goto   memory hotplug: r...
961
962
963
964
  	ret = memory_notify(MEM_GOING_ONLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret) {
  		memory_notify(MEM_CANCEL_ONLINE, &arg);
30467e0b3   David Rientjes   mm, hotplug: fix ...
965
  		return ret;
7b78d335a   Yasunori Goto   memory hotplug: r...
966
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
967
  	/*
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
968
969
970
971
  	 * If this zone is not populated, then it is not in zonelist.
  	 * This means the page allocator ignores this zone.
  	 * So, zonelist must be updated after online.
  	 */
4eaf3f643   Haicheng Li   mem-hotplug: fix ...
972
  	mutex_lock(&zonelists_mutex);
6dcd73d70   Wen Congyang   memory-hotplug: a...
973
  	if (!populated_zone(zone)) {
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
974
  		need_zonelists_rebuild = 1;
6dcd73d70   Wen Congyang   memory-hotplug: a...
975
976
  		build_all_zonelists(NULL, zone);
  	}
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
977

908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
978
  	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
979
  		online_pages_range);
fd8a4221a   Geoff Levand   memory_hotplug: c...
980
  	if (ret) {
6dcd73d70   Wen Congyang   memory-hotplug: a...
981
982
  		if (need_zonelists_rebuild)
  			zone_pcp_reset(zone);
4eaf3f643   Haicheng Li   mem-hotplug: fix ...
983
  		mutex_unlock(&zonelists_mutex);
a62e2f4f5   Bjorn Helgaas   mm: print physica...
984
985
986
987
988
  		printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] failed
  ",
  		       (unsigned long long) pfn << PAGE_SHIFT,
  		       (((unsigned long long) pfn + nr_pages)
  			    << PAGE_SHIFT) - 1);
fd8a4221a   Geoff Levand   memory_hotplug: c...
989
  		memory_notify(MEM_CANCEL_ONLINE, &arg);
30467e0b3   David Rientjes   mm, hotplug: fix ...
990
  		return ret;
fd8a4221a   Geoff Levand   memory_hotplug: c...
991
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
992
  	zone->present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
993
994
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
f2937be58   Yasunori Goto   [PATCH] memory ho...
995
  	zone->zone_pgdat->node_present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
996
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
997
  	if (onlined_pages) {
d9713679d   Lai Jiangshan   memory_hotplug: f...
998
  		node_states_set_node(zone_to_nid(zone), &arg);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
999
  		if (need_zonelists_rebuild)
6dcd73d70   Wen Congyang   memory-hotplug: a...
1000
  			build_all_zonelists(NULL, NULL);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
1001
1002
1003
  		else
  			zone_pcp_update(zone);
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
1004

4eaf3f643   Haicheng Li   mem-hotplug: fix ...
1005
  	mutex_unlock(&zonelists_mutex);
1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
1006
1007
  
  	init_per_zone_wmark_min();
08dff7b7d   Jiang Liu   mm/hotplug: corre...
1008
  	if (onlined_pages)
7ea1530ab   Christoph Lameter   Memoryless nodes:...
1009
  		kswapd_run(zone_to_nid(zone));
61b13993a   Dave Hansen   [PATCH] memory ho...
1010

1f522509c   Haicheng Li   mem-hotplug: avoi...
1011
  	vm_total_pages = nr_free_pagecache_pages();
2f7f24eca   Kent Liu   memory-hotplug: d...
1012

2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
1013
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
1014
1015
1016
  
  	if (onlined_pages)
  		memory_notify(MEM_ONLINE, &arg);
30467e0b3   David Rientjes   mm, hotplug: fix ...
1017
  	return 0;
3947be196   Dave Hansen   [PATCH] memory ho...
1018
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
1019
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1020

0bd854200   Tang Chen   mem-hotplug: rese...
1021
1022
1023
1024
1025
1026
1027
1028
1029
  static void reset_node_present_pages(pg_data_t *pgdat)
  {
  	struct zone *z;
  
  	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
  		z->present_pages = 0;
  
  	pgdat->node_present_pages = 0;
  }
e13193319   Hidetoshi Seto   mm/memory_hotplug...
1030
1031
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1032
1033
1034
1035
  {
  	struct pglist_data *pgdat;
  	unsigned long zones_size[MAX_NR_ZONES] = {0};
  	unsigned long zholes_size[MAX_NR_ZONES] = {0};
c8e861a53   Fabian Frederick   mm/memory_hotplug...
1036
  	unsigned long start_pfn = PFN_DOWN(start);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1037

a1e565aa3   Tang Chen   memory-hotplug: d...
1038
1039
1040
1041
1042
  	pgdat = NODE_DATA(nid);
  	if (!pgdat) {
  		pgdat = arch_alloc_nodedata(nid);
  		if (!pgdat)
  			return NULL;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1043

a1e565aa3   Tang Chen   memory-hotplug: d...
1044
  		arch_refresh_nodedata(nid, pgdat);
b0dc3a342   Gu Zheng   mm/memory hotplug...
1045
1046
1047
1048
  	} else {
  		/* Reset the nr_zones and classzone_idx to 0 before reuse */
  		pgdat->nr_zones = 0;
  		pgdat->classzone_idx = 0;
a1e565aa3   Tang Chen   memory-hotplug: d...
1049
  	}
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1050
1051
1052
1053
  
  	/* we can use NODE_DATA(nid) from here */
  
  	/* init node's zones as empty zones, we don't have any present pages.*/
9109fb7b3   Johannes Weiner   mm: drop unneeded...
1054
  	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1055

959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
1056
1057
1058
1059
  	/*
  	 * The node we allocated has no zone fallback lists. For avoiding
  	 * to access not-initialized zonelist, build here.
  	 */
f957db4fc   David Rientjes   mm, hotplug: prot...
1060
  	mutex_lock(&zonelists_mutex);
9adb62a5d   Jiang Liu   mm/hotplug: corre...
1061
  	build_all_zonelists(pgdat, NULL);
f957db4fc   David Rientjes   mm, hotplug: prot...
1062
  	mutex_unlock(&zonelists_mutex);
959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
1063

f784a3f19   Tang Chen   mem-hotplug: rese...
1064
1065
1066
1067
1068
1069
1070
  	/*
  	 * zone->managed_pages is set to an approximate value in
  	 * free_area_init_core(), which will cause
  	 * /sys/device/system/node/nodeX/meminfo has wrong data.
  	 * So reset it to 0 before any memory is onlined.
  	 */
  	reset_node_managed_pages(pgdat);
0bd854200   Tang Chen   mem-hotplug: rese...
1071
1072
1073
1074
1075
1076
  	/*
  	 * When memory is hot-added, all the memory is in offline state. So
  	 * clear all zones' present_pages because they will be updated in
  	 * online_pages() and offline_pages().
  	 */
  	reset_node_present_pages(pgdat);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1077
1078
1079
1080
1081
1082
1083
1084
1085
  	return pgdat;
  }
  
  static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
  {
  	arch_refresh_nodedata(nid, NULL);
  	arch_free_nodedata(pgdat);
  	return;
  }
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
1086

01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1087
1088
1089
  /**
   * try_online_node - online a node if offlined
   *
cf23422b9   minskey guo   cpu/mem hotplug: ...
1090
1091
   * called by cpu_up() to online a node without onlined memory.
   */
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1092
  int try_online_node(int nid)
cf23422b9   minskey guo   cpu/mem hotplug: ...
1093
1094
1095
  {
  	pg_data_t	*pgdat;
  	int	ret;
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1096
1097
  	if (node_online(nid))
  		return 0;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1098
  	mem_hotplug_begin();
cf23422b9   minskey guo   cpu/mem hotplug: ...
1099
  	pgdat = hotadd_new_pgdat(nid, 0);
7553e8f2d   David Rientjes   mm, hotplug: fix ...
1100
  	if (!pgdat) {
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1101
1102
  		pr_err("Cannot online node %d due to NULL pgdat
  ", nid);
cf23422b9   minskey guo   cpu/mem hotplug: ...
1103
1104
1105
1106
1107
1108
  		ret = -ENOMEM;
  		goto out;
  	}
  	node_set_online(nid);
  	ret = register_one_node(nid);
  	BUG_ON(ret);
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1109
1110
1111
1112
1113
  	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
  		mutex_lock(&zonelists_mutex);
  		build_all_zonelists(NULL, NULL);
  		mutex_unlock(&zonelists_mutex);
  	}
cf23422b9   minskey guo   cpu/mem hotplug: ...
1114
  out:
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1115
  	mem_hotplug_done();
cf23422b9   minskey guo   cpu/mem hotplug: ...
1116
1117
  	return ret;
  }
27356f54c   Toshi Kani   mm/hotplug: verif...
1118
1119
  static int check_hotplug_memory_range(u64 start, u64 size)
  {
c8e861a53   Fabian Frederick   mm/memory_hotplug...
1120
  	u64 start_pfn = PFN_DOWN(start);
27356f54c   Toshi Kani   mm/hotplug: verif...
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
  	u64 nr_pages = size >> PAGE_SHIFT;
  
  	/* Memory range must be aligned with section */
  	if ((start_pfn & ~PAGE_SECTION_MASK) ||
  	    (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) {
  		pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx
  ",
  				(unsigned long long)start,
  				(unsigned long long)size);
  		return -EINVAL;
  	}
  
  	return 0;
  }
632644007   Wang Nan   memory-hotplug: a...
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
  /*
   * If movable zone has already been setup, newly added memory should be check.
   * If its address is higher than movable zone, it should be added as movable.
   * Without this check, movable zone may overlap with other zone.
   */
  static int should_add_memory_movable(int nid, u64 start, u64 size)
  {
  	unsigned long start_pfn = start >> PAGE_SHIFT;
  	pg_data_t *pgdat = NODE_DATA(nid);
  	struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
  
  	if (zone_is_empty(movable_zone))
  		return 0;
  
  	if (movable_zone->zone_start_pfn <= start_pfn)
  		return 1;
  
  	return 0;
  }
  
  int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
  {
  	if (should_add_memory_movable(nid, start, size))
  		return ZONE_MOVABLE;
  
  	return zone_default;
  }
31168481c   Al Viro   meminit section w...
1162
1163
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  int __ref add_memory(int nid, u64 start, u64 size)
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1164
  {
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1165
  	pg_data_t *pgdat = NULL;
a1e565aa3   Tang Chen   memory-hotplug: d...
1166
1167
  	bool new_pgdat;
  	bool new_node;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
1168
  	struct resource *res;
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1169
  	int ret;
27356f54c   Toshi Kani   mm/hotplug: verif...
1170
1171
1172
  	ret = check_hotplug_memory_range(start, size);
  	if (ret)
  		return ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
1173
  	res = register_memory_resource(start, size);
6ad696d2c   Andi Kleen   mm: allow memory ...
1174
  	ret = -EEXIST;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
1175
  	if (!res)
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
1176
  		return ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
1177

a1e565aa3   Tang Chen   memory-hotplug: d...
1178
1179
1180
1181
  	{	/* Stupid hack to suppress address-never-null warning */
  		void *p = NODE_DATA(nid);
  		new_pgdat = !p;
  	}
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
1182

bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1183
  	mem_hotplug_begin();
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
1184

a1e565aa3   Tang Chen   memory-hotplug: d...
1185
1186
  	new_node = !node_online(nid);
  	if (new_node) {
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1187
  		pgdat = hotadd_new_pgdat(nid, start);
6ad696d2c   Andi Kleen   mm: allow memory ...
1188
  		ret = -ENOMEM;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1189
  		if (!pgdat)
41b9e2d7e   Wen Congyang   mm/memory_hotplug...
1190
  			goto error;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1191
  	}
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1192
1193
  	/* call arch's memory hotadd */
  	ret = arch_add_memory(nid, start, size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1194
1195
  	if (ret < 0)
  		goto error;
0fc44159b   Yasunori Goto   [PATCH] Register ...
1196
  	/* we online node here. we can't roll back from here. */
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1197
  	node_set_online(nid);
a1e565aa3   Tang Chen   memory-hotplug: d...
1198
  	if (new_node) {
0fc44159b   Yasunori Goto   [PATCH] Register ...
1199
1200
1201
1202
1203
1204
1205
1206
  		ret = register_one_node(nid);
  		/*
  		 * If sysfs file of new node can't create, cpu on the node
  		 * can't be hot-added. There is no rollback way now.
  		 * So, check by BUG_ON() to catch it reluctantly..
  		 */
  		BUG_ON(ret);
  	}
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
1207
1208
  	/* create new memmap entry */
  	firmware_map_add_hotplug(start, start + size, "System RAM");
6ad696d2c   Andi Kleen   mm: allow memory ...
1209
  	goto out;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1210
1211
1212
1213
  error:
  	/* rollback pgdat allocation and others */
  	if (new_pgdat)
  		rollback_node_hotadd(nid, pgdat);
a864b9d06   Sasha Levin   mm: memory_hotplu...
1214
  	release_memory_resource(res);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1215

6ad696d2c   Andi Kleen   mm: allow memory ...
1216
  out:
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1217
  	mem_hotplug_done();
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1218
1219
1220
  	return ret;
  }
  EXPORT_SYMBOL_GPL(add_memory);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1221
1222
1223
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  /*
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
   * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
   * set and the size of the free page is given by page_order(). Using this,
   * the function determines if the pageblock contains only free pages.
   * Due to buddy contraints, a free page at least the size of a pageblock will
   * be located at the start of the pageblock
   */
  static inline int pageblock_free(struct page *page)
  {
  	return PageBuddy(page) && page_order(page) >= pageblock_order;
  }
  
  /* Return the start of the next active pageblock after a given page */
  static struct page *next_active_pageblock(struct page *page)
  {
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1238
1239
  	/* Ensure the starting page is pageblock-aligned */
  	BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1240
  	/* If the entire pageblock is free, move to the end of free page */
0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1241
1242
1243
1244
1245
1246
1247
  	if (pageblock_free(page)) {
  		int order;
  		/* be careful. we don't have locks, page_order can be changed.*/
  		order = page_order(page);
  		if ((order < MAX_ORDER) && (order >= pageblock_order))
  			return page + (1 << order);
  	}
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1248

0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1249
  	return page + pageblock_nr_pages;
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1250
1251
1252
1253
1254
  }
  
  /* Checks if this range of memory is likely to be hot-removable. */
  int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
  {
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1255
1256
1257
1258
1259
  	struct page *page = pfn_to_page(start_pfn);
  	struct page *end_page = page + nr_pages;
  
  	/* Check the starting page of each pageblock within the range */
  	for (; page < end_page; page = next_active_pageblock(page)) {
49ac82558   KAMEZAWA Hiroyuki   memory hotplug: u...
1260
  		if (!is_pageblock_removable_nolock(page))
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1261
  			return 0;
49ac82558   KAMEZAWA Hiroyuki   memory hotplug: u...
1262
  		cond_resched();
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1263
1264
1265
1266
1267
1268
1269
  	}
  
  	/* All pageblocks in the memory block are likely to be hot-removable */
  	return 1;
  }
  
  /*
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1270
1271
   * Confirm all pages in a range [start, end) is belongs to the same zone.
   */
ed2f24009   Zhang Zhen   memory-hotplug: a...
1272
  int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
  {
  	unsigned long pfn;
  	struct zone *zone = NULL;
  	struct page *page;
  	int i;
  	for (pfn = start_pfn;
  	     pfn < end_pfn;
  	     pfn += MAX_ORDER_NR_PAGES) {
  		i = 0;
  		/* This is just a CONFIG_HOLES_IN_ZONE check.*/
  		while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
  			i++;
  		if (i == MAX_ORDER_NR_PAGES)
  			continue;
  		page = pfn_to_page(pfn + i);
  		if (zone && page_zone(page) != zone)
  			return 0;
  		zone = page_zone(page);
  	}
  	return 1;
  }
  
  /*
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1296
1297
1298
1299
   * Scan pfn range [start,end) to find movable/migratable pages (LRU pages
   * and hugepages). We scan pfn because it's much easier than scanning over
   * linked list. This function returns the pfn of the first found movable
   * page if it's found, otherwise 0.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1300
   */
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1301
  static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1302
1303
1304
1305
1306
1307
1308
1309
  {
  	unsigned long pfn;
  	struct page *page;
  	for (pfn = start; pfn < end; pfn++) {
  		if (pfn_valid(pfn)) {
  			page = pfn_to_page(pfn);
  			if (PageLRU(page))
  				return pfn;
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1310
  			if (PageHuge(page)) {
7e1f049ef   Naoya Horiguchi   mm: hugetlb: clea...
1311
  				if (page_huge_active(page))
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1312
1313
1314
1315
1316
  					return pfn;
  				else
  					pfn = round_up(pfn + 1,
  						1 << compound_order(page)) - 1;
  			}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1317
1318
1319
1320
  		}
  	}
  	return 0;
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
  #define NR_OFFLINE_AT_ONCE_PAGES	(256)
  static int
  do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct page *page;
  	int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
  	int not_managed = 0;
  	int ret = 0;
  	LIST_HEAD(source);
  
  	for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
  		if (!pfn_valid(pfn))
  			continue;
  		page = pfn_to_page(pfn);
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
  
  		if (PageHuge(page)) {
  			struct page *head = compound_head(page);
  			pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
  			if (compound_order(head) > PFN_SECTION_SHIFT) {
  				ret = -EBUSY;
  				break;
  			}
  			if (isolate_huge_page(page, &source))
  				move_pages -= 1 << compound_order(head);
  			continue;
  		}
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1348
  		if (!get_page_unless_zero(page))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1349
1350
1351
1352
1353
  			continue;
  		/*
  		 * We can skip free pages. And we can only deal with pages on
  		 * LRU.
  		 */
62695a84e   Nick Piggin   vmscan: move isol...
1354
  		ret = isolate_lru_page(page);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1355
  		if (!ret) { /* Success */
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1356
  			put_page(page);
62695a84e   Nick Piggin   vmscan: move isol...
1357
  			list_add_tail(&page->lru, &source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1358
  			move_pages--;
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1359
1360
  			inc_zone_page_state(page, NR_ISOLATED_ANON +
  					    page_is_file_cache(page));
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1361
  		} else {
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1362
  #ifdef CONFIG_DEBUG_VM
718a38211   Wu Fengguang   mm: introduce dum...
1363
1364
1365
  			printk(KERN_ALERT "removing pfn %lx from LRU failed
  ",
  			       pfn);
f0b791a34   Dave Hansen   mm: print more de...
1366
  			dump_page(page, "failed to remove from LRU");
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1367
  #endif
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1368
  			put_page(page);
25985edce   Lucas De Marchi   Fix common misspe...
1369
  			/* Because we don't have big zone->lock. we should
809c44497   Bob Liu   mm: do_migrate_ra...
1370
1371
1372
  			   check this again here. */
  			if (page_count(page)) {
  				not_managed++;
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1373
  				ret = -EBUSY;
809c44497   Bob Liu   mm: do_migrate_ra...
1374
1375
  				break;
  			}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1376
1377
  		}
  	}
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1378
1379
  	if (!list_empty(&source)) {
  		if (not_managed) {
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1380
  			putback_movable_pages(&source);
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1381
1382
  			goto out;
  		}
74c08f982   Minchan Kim   memory-hotplug: d...
1383
1384
1385
1386
1387
  
  		/*
  		 * alloc_migrate_target should be improooooved!!
  		 * migrate_pages returns # of failed pages.
  		 */
68711a746   David Rientjes   mm, migration: ad...
1388
  		ret = migrate_pages(&source, alloc_migrate_target, NULL, 0,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1389
  					MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1390
  		if (ret)
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1391
  			putback_movable_pages(&source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1392
  	}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
  out:
  	return ret;
  }
  
  /*
   * remove from free_area[] and mark all as Reserved.
   */
  static int
  offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
  			void *data)
  {
  	__offline_isolated_pages(start, start + nr_pages);
  	return 0;
  }
  
  static void
  offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
1411
  	walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
  				offline_isolated_pages_cb);
  }
  
  /*
   * Check all pages in range, recoreded as memory resource, are isolated.
   */
  static int
  check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
  			void *data)
  {
  	int ret;
  	long offlined = *(long *)data;
b023f4681   Wen Congyang   memory-hotplug: s...
1424
  	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages, true);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
  	offlined = nr_pages;
  	if (!ret)
  		*(long *)data += offlined;
  	return ret;
  }
  
  static long
  check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
  {
  	long offlined = 0;
  	int ret;
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
1436
  	ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1437
1438
1439
1440
1441
  			check_pages_isolated_cb);
  	if (ret < 0)
  		offlined = (long)ret;
  	return offlined;
  }
09285af75   Lai Jiangshan   memory_hotplug: a...
1442
  #ifdef CONFIG_MOVABLE_NODE
79a4dcefd   Tang Chen   mm/memory_hotplug...
1443
1444
1445
1446
  /*
   * When CONFIG_MOVABLE_NODE, we permit offlining of a node which doesn't have
   * normal memory.
   */
09285af75   Lai Jiangshan   memory_hotplug: a...
1447
1448
1449
1450
  static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
  {
  	return true;
  }
79a4dcefd   Tang Chen   mm/memory_hotplug...
1451
  #else /* CONFIG_MOVABLE_NODE */
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
  /* ensure the node has NORMAL memory if it is still online */
  static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	unsigned long present_pages = 0;
  	enum zone_type zt;
  
  	for (zt = 0; zt <= ZONE_NORMAL; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  
  	if (present_pages > nr_pages)
  		return true;
  
  	present_pages = 0;
  	for (; zt <= ZONE_MOVABLE; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  
  	/*
  	 * we can't offline the last normal memory until all
  	 * higher memory is offlined.
  	 */
  	return present_pages == 0;
  }
79a4dcefd   Tang Chen   mm/memory_hotplug...
1475
  #endif /* CONFIG_MOVABLE_NODE */
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
1476

c5320926e   Tang Chen   mem-hotplug: intr...
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
  static int __init cmdline_parse_movable_node(char *p)
  {
  #ifdef CONFIG_MOVABLE_NODE
  	/*
  	 * Memory used by the kernel cannot be hot-removed because Linux
  	 * cannot migrate the kernel pages. When memory hotplug is
  	 * enabled, we should prevent memblock from allocating memory
  	 * for the kernel.
  	 *
  	 * ACPI SRAT records all hotpluggable memory ranges. But before
  	 * SRAT is parsed, we don't know about it.
  	 *
  	 * The kernel image is loaded into memory at very early time. We
  	 * cannot prevent this anyway. So on NUMA system, we set any
  	 * node the kernel resides in as un-hotpluggable.
  	 *
  	 * Since on modern servers, one node could have double-digit
  	 * gigabytes memory, we can assume the memory around the kernel
  	 * image is also un-hotpluggable. So before SRAT is parsed, just
  	 * allocate memory near the kernel image to try the best to keep
  	 * the kernel away from hotpluggable memory.
  	 */
  	memblock_set_bottom_up(true);
55ac590c2   Tang Chen   memblock, mem_hot...
1500
  	movable_node_enabled = true;
c5320926e   Tang Chen   mem-hotplug: intr...
1501
1502
1503
1504
1505
1506
1507
  #else
  	pr_warn("movable_node option not supported
  ");
  #endif
  	return 0;
  }
  early_param("movable_node", cmdline_parse_movable_node);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1508
1509
1510
1511
1512
1513
1514
1515
1516
  /* check which state of node_states will be changed when offline memory */
  static void node_states_check_changes_offline(unsigned long nr_pages,
  		struct zone *zone, struct memory_notify *arg)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	unsigned long present_pages = 0;
  	enum zone_type zt, zone_last = ZONE_NORMAL;
  
  	/*
6715ddf94   Lai Jiangshan   hotplug: update n...
1517
1518
1519
  	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_NORMAL,
  	 * set zone_last to ZONE_NORMAL.
d9713679d   Lai Jiangshan   memory_hotplug: f...
1520
  	 *
6715ddf94   Lai Jiangshan   hotplug: update n...
1521
1522
1523
  	 * If we don't have HIGHMEM nor movable node,
  	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
  	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
d9713679d   Lai Jiangshan   memory_hotplug: f...
1524
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
1525
  	if (N_MEMORY == N_NORMAL_MEMORY)
d9713679d   Lai Jiangshan   memory_hotplug: f...
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
  		zone_last = ZONE_MOVABLE;
  
  	/*
  	 * check whether node_states[N_NORMAL_MEMORY] will be changed.
  	 * If the memory to be offline is in a zone of 0...zone_last,
  	 * and it is the last present memory, 0...zone_last will
  	 * become empty after offline , thus we can determind we will
  	 * need to clear the node from node_states[N_NORMAL_MEMORY].
  	 */
  	for (zt = 0; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
  		arg->status_change_nid_normal = zone_to_nid(zone);
  	else
  		arg->status_change_nid_normal = -1;
6715ddf94   Lai Jiangshan   hotplug: update n...
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
  #ifdef CONFIG_HIGHMEM
  	/*
  	 * If we have movable node, node_states[N_HIGH_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
  	 * set zone_last to ZONE_HIGHMEM.
  	 *
  	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_MOVABLE,
  	 * set zone_last to ZONE_MOVABLE.
  	 */
  	zone_last = ZONE_HIGHMEM;
  	if (N_MEMORY == N_HIGH_MEMORY)
  		zone_last = ZONE_MOVABLE;
  
  	for (; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
  		arg->status_change_nid_high = zone_to_nid(zone);
  	else
  		arg->status_change_nid_high = -1;
  #else
  	arg->status_change_nid_high = arg->status_change_nid_normal;
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
  	/*
  	 * node_states[N_HIGH_MEMORY] contains nodes which have 0...ZONE_MOVABLE
  	 */
  	zone_last = ZONE_MOVABLE;
  
  	/*
  	 * check whether node_states[N_HIGH_MEMORY] will be changed
  	 * If we try to offline the last present @nr_pages from the node,
  	 * we can determind we will need to clear the node from
  	 * node_states[N_HIGH_MEMORY].
  	 */
  	for (; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (nr_pages >= present_pages)
  		arg->status_change_nid = zone_to_nid(zone);
  	else
  		arg->status_change_nid = -1;
  }
  
  static void node_states_clear_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_clear_state(node, N_NORMAL_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
1587
1588
  	if ((N_MEMORY != N_NORMAL_MEMORY) &&
  	    (arg->status_change_nid_high >= 0))
d9713679d   Lai Jiangshan   memory_hotplug: f...
1589
  		node_clear_state(node, N_HIGH_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
1590
1591
1592
1593
  
  	if ((N_MEMORY != N_HIGH_MEMORY) &&
  	    (arg->status_change_nid >= 0))
  		node_clear_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1594
  }
a16cee10c   Wen Congyang   memory-hotplug: p...
1595
  static int __ref __offline_pages(unsigned long start_pfn,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1596
1597
1598
1599
  		  unsigned long end_pfn, unsigned long timeout)
  {
  	unsigned long pfn, nr_pages, expire;
  	long offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
1600
  	int ret, drain, retry_max, node;
d702909f0   Cody P Schafer   memory_hotplug: u...
1601
  	unsigned long flags;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1602
  	struct zone *zone;
7b78d335a   Yasunori Goto   memory hotplug: r...
1603
  	struct memory_notify arg;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1604

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1605
1606
1607
1608
1609
1610
1611
1612
1613
  	/* at least, alignment against pageblock is necessary */
  	if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	/* This makes hotplug much easier...and readable.
  	   we assume this for now. .*/
  	if (!test_pages_in_a_zone(start_pfn, end_pfn))
  		return -EINVAL;
7b78d335a   Yasunori Goto   memory hotplug: r...
1614
1615
1616
1617
  
  	zone = page_zone(pfn_to_page(start_pfn));
  	node = zone_to_nid(zone);
  	nr_pages = end_pfn - start_pfn;
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
1618
  	if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages))
30467e0b3   David Rientjes   mm, hotplug: fix ...
1619
  		return -EINVAL;
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
1620

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1621
  	/* set above range as isolated */
b023f4681   Wen Congyang   memory-hotplug: s...
1622
1623
  	ret = start_isolate_page_range(start_pfn, end_pfn,
  				       MIGRATE_MOVABLE, true);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1624
  	if (ret)
30467e0b3   David Rientjes   mm, hotplug: fix ...
1625
  		return ret;
7b78d335a   Yasunori Goto   memory hotplug: r...
1626
1627
1628
  
  	arg.start_pfn = start_pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
1629
  	node_states_check_changes_offline(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
1630
1631
1632
1633
1634
  
  	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret)
  		goto failed_removal;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
  	pfn = start_pfn;
  	expire = jiffies + timeout;
  	drain = 0;
  	retry_max = 5;
  repeat:
  	/* start memory hot removal */
  	ret = -EAGAIN;
  	if (time_after(jiffies, expire))
  		goto failed_removal;
  	ret = -EINTR;
  	if (signal_pending(current))
  		goto failed_removal;
  	ret = 0;
  	if (drain) {
  		lru_add_drain_all();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1650
  		cond_resched();
c05543293   Vlastimil Babka   mm, memory_hotplu...
1651
  		drain_all_pages(zone);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1652
  	}
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1653
1654
  	pfn = scan_movable_pages(start_pfn, end_pfn);
  	if (pfn) { /* We have movable pages */
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
  		ret = do_migrate_range(pfn, end_pfn);
  		if (!ret) {
  			drain = 1;
  			goto repeat;
  		} else {
  			if (ret < 0)
  				if (--retry_max == 0)
  					goto failed_removal;
  			yield();
  			drain = 1;
  			goto repeat;
  		}
  	}
b3834be5c   Adam Buchbinder   various: Fix spel...
1668
  	/* drain all zone's lru pagevec, this is asynchronous... */
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1669
  	lru_add_drain_all();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1670
  	yield();
b3834be5c   Adam Buchbinder   various: Fix spel...
1671
  	/* drain pcp pages, this is synchronous. */
c05543293   Vlastimil Babka   mm, memory_hotplu...
1672
  	drain_all_pages(zone);
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1673
1674
1675
1676
1677
  	/*
  	 * dissolve free hugepages in the memory block before doing offlining
  	 * actually in order to make hugetlbfs's object counting consistent.
  	 */
  	dissolve_free_huge_pages(start_pfn, end_pfn);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1678
1679
1680
1681
1682
1683
1684
1685
  	/* check again */
  	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
  	if (offlined_pages < 0) {
  		ret = -EBUSY;
  		goto failed_removal;
  	}
  	printk(KERN_INFO "Offlined Pages %ld
  ", offlined_pages);
b3834be5c   Adam Buchbinder   various: Fix spel...
1686
  	/* Ok, all of our target is isolated.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1687
1688
  	   We cannot do rollback at this point. */
  	offline_isolated_pages(start_pfn, end_pfn);
dbc0e4cef   KAMEZAWA Hiroyuki   memory hotremove:...
1689
  	/* reset pagetype flags and makes migrate type to be MOVABLE */
0815f3d81   Michal Nazarewicz   mm: page_isolatio...
1690
  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1691
  	/* removal success */
3dcc0571c   Jiang Liu   mm: correctly upd...
1692
  	adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1693
  	zone->present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1694
1695
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1696
  	zone->zone_pgdat->node_present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1697
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
7b78d335a   Yasunori Goto   memory hotplug: r...
1698

1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
1699
  	init_per_zone_wmark_min();
1e8537baa   Xishi Qiu   memory-hotplug: b...
1700
  	if (!populated_zone(zone)) {
340175b7d   Jiang Liu   mm/hotplug: free ...
1701
  		zone_pcp_reset(zone);
1e8537baa   Xishi Qiu   memory-hotplug: b...
1702
1703
1704
1705
1706
  		mutex_lock(&zonelists_mutex);
  		build_all_zonelists(NULL, NULL);
  		mutex_unlock(&zonelists_mutex);
  	} else
  		zone_pcp_update(zone);
340175b7d   Jiang Liu   mm/hotplug: free ...
1707

d9713679d   Lai Jiangshan   memory_hotplug: f...
1708
1709
  	node_states_clear_node(node, &arg);
  	if (arg.status_change_nid >= 0)
8fe23e057   David Rientjes   mm: clear node in...
1710
  		kswapd_stop(node);
bce7394a3   Minchan Kim   page-allocator: r...
1711

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1712
1713
  	vm_total_pages = nr_free_pagecache_pages();
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
1714
1715
  
  	memory_notify(MEM_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1716
1717
1718
  	return 0;
  
  failed_removal:
a62e2f4f5   Bjorn Helgaas   mm: print physica...
1719
1720
1721
1722
  	printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed
  ",
  	       (unsigned long long) start_pfn << PAGE_SHIFT,
  	       ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
7b78d335a   Yasunori Goto   memory hotplug: r...
1723
  	memory_notify(MEM_CANCEL_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1724
  	/* pushback to free area */
0815f3d81   Michal Nazarewicz   mm: page_isolatio...
1725
  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1726
1727
  	return ret;
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1728

30467e0b3   David Rientjes   mm, hotplug: fix ...
1729
  /* Must be protected by mem_hotplug_begin() */
a16cee10c   Wen Congyang   memory-hotplug: p...
1730
1731
1732
1733
  int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
  {
  	return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ);
  }
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1734
  #endif /* CONFIG_MEMORY_HOTREMOVE */
a16cee10c   Wen Congyang   memory-hotplug: p...
1735

bbc76be67   Wen Congyang   memory-hotplug: r...
1736
1737
1738
  /**
   * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn)
   * @start_pfn: start pfn of the memory range
e05c4bbfa   Toshi Kani   mm: walk_memory_r...
1739
   * @end_pfn: end pfn of the memory range
bbc76be67   Wen Congyang   memory-hotplug: r...
1740
1741
1742
1743
1744
1745
1746
1747
   * @arg: argument passed to func
   * @func: callback for each memory section walked
   *
   * This function walks through all present mem sections in range
   * [start_pfn, end_pfn) and call func on each mem section.
   *
   * Returns the return value of func.
   */
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1748
  int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
bbc76be67   Wen Congyang   memory-hotplug: r...
1749
  		void *arg, int (*func)(struct memory_block *, void *))
71088785c   Badari Pulavarty   mm: cleanup to ma...
1750
  {
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1751
1752
  	struct memory_block *mem = NULL;
  	struct mem_section *section;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1753
1754
  	unsigned long pfn, section_nr;
  	int ret;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		section_nr = pfn_to_section_nr(pfn);
  		if (!present_section_nr(section_nr))
  			continue;
  
  		section = __nr_to_section(section_nr);
  		/* same memblock? */
  		if (mem)
  			if ((section_nr >= mem->start_section_nr) &&
  			    (section_nr <= mem->end_section_nr))
  				continue;
  
  		mem = find_memory_block_hinted(section, mem);
  		if (!mem)
  			continue;
bbc76be67   Wen Congyang   memory-hotplug: r...
1771
  		ret = func(mem, arg);
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1772
  		if (ret) {
bbc76be67   Wen Congyang   memory-hotplug: r...
1773
1774
  			kobject_put(&mem->dev.kobj);
  			return ret;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1775
1776
1777
1778
1779
  		}
  	}
  
  	if (mem)
  		kobject_put(&mem->dev.kobj);
bbc76be67   Wen Congyang   memory-hotplug: r...
1780
1781
  	return 0;
  }
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1782
  #ifdef CONFIG_MEMORY_HOTREMOVE
d6de9d534   Xishi Qiu   mm/memory_hotplug...
1783
  static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
bbc76be67   Wen Congyang   memory-hotplug: r...
1784
1785
  {
  	int ret = !is_memblock_offlined(mem);
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1786
1787
1788
1789
1790
  	if (unlikely(ret)) {
  		phys_addr_t beginpa, endpa;
  
  		beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
  		endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
bbc76be67   Wen Congyang   memory-hotplug: r...
1791
  		pr_warn("removing memory fails, because memory "
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1792
1793
1794
1795
  			"[%pa-%pa] is onlined
  ",
  			&beginpa, &endpa);
  	}
bbc76be67   Wen Congyang   memory-hotplug: r...
1796
1797
1798
  
  	return ret;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1799
  static int check_cpu_on_node(pg_data_t *pgdat)
60a5a19e7   Tang Chen   memory-hotplug: r...
1800
  {
60a5a19e7   Tang Chen   memory-hotplug: r...
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
  	int cpu;
  
  	for_each_present_cpu(cpu) {
  		if (cpu_to_node(cpu) == pgdat->node_id)
  			/*
  			 * the cpu on this node isn't removed, and we can't
  			 * offline this node.
  			 */
  			return -EBUSY;
  	}
  
  	return 0;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1814
  static void unmap_cpu_on_node(pg_data_t *pgdat)
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1815
1816
  {
  #ifdef CONFIG_ACPI_NUMA
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1817
1818
1819
1820
1821
1822
1823
  	int cpu;
  
  	for_each_possible_cpu(cpu)
  		if (cpu_to_node(cpu) == pgdat->node_id)
  			numa_clear_node(cpu);
  #endif
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1824
  static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1825
  {
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1826
  	int ret;
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1827

0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1828
  	ret = check_cpu_on_node(pgdat);
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1829
1830
1831
1832
1833
1834
1835
  	if (ret)
  		return ret;
  
  	/*
  	 * the node will be offlined when we come here, so we can clear
  	 * the cpu_to_node() now.
  	 */
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1836
  	unmap_cpu_on_node(pgdat);
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1837
1838
  	return 0;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1839
1840
1841
1842
1843
1844
1845
1846
  /**
   * try_offline_node
   *
   * Offline a node if all memory sections and cpus of the node are removed.
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call.
   */
90b30cdc1   Wen Congyang   memory-hotplug: e...
1847
  void try_offline_node(int nid)
60a5a19e7   Tang Chen   memory-hotplug: r...
1848
  {
d822b86a9   Wen Congyang   memory-hotplug: f...
1849
1850
1851
  	pg_data_t *pgdat = NODE_DATA(nid);
  	unsigned long start_pfn = pgdat->node_start_pfn;
  	unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
60a5a19e7   Tang Chen   memory-hotplug: r...
1852
  	unsigned long pfn;
d822b86a9   Wen Congyang   memory-hotplug: f...
1853
  	int i;
60a5a19e7   Tang Chen   memory-hotplug: r...
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		unsigned long section_nr = pfn_to_section_nr(pfn);
  
  		if (!present_section_nr(section_nr))
  			continue;
  
  		if (pfn_to_nid(pfn) != nid)
  			continue;
  
  		/*
  		 * some memory sections of this node are not removed, and we
  		 * can't offline node now.
  		 */
  		return;
  	}
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1870
  	if (check_and_unmap_cpu_on_node(pgdat))
60a5a19e7   Tang Chen   memory-hotplug: r...
1871
1872
1873
1874
1875
1876
1877
1878
  		return;
  
  	/*
  	 * all memory/cpu of this node are removed, we can offline this
  	 * node now.
  	 */
  	node_set_offline(nid);
  	unregister_one_node(nid);
d822b86a9   Wen Congyang   memory-hotplug: f...
1879

d822b86a9   Wen Congyang   memory-hotplug: f...
1880
1881
1882
  	/* free waittable in each zone */
  	for (i = 0; i < MAX_NR_ZONES; i++) {
  		struct zone *zone = pgdat->node_zones + i;
ca4b3f302   Jianguo Wu   mm/hotplug: only ...
1883
1884
1885
1886
  		/*
  		 * wait_table may be allocated from boot memory,
  		 * here only free if it's allocated by vmalloc.
  		 */
85bd83998   Gu Zheng   mm/memory_hotplug...
1887
  		if (is_vmalloc_addr(zone->wait_table)) {
d822b86a9   Wen Congyang   memory-hotplug: f...
1888
  			vfree(zone->wait_table);
85bd83998   Gu Zheng   mm/memory_hotplug...
1889
1890
  			zone->wait_table = NULL;
  		}
d822b86a9   Wen Congyang   memory-hotplug: f...
1891
  	}
60a5a19e7   Tang Chen   memory-hotplug: r...
1892
  }
90b30cdc1   Wen Congyang   memory-hotplug: e...
1893
  EXPORT_SYMBOL(try_offline_node);
60a5a19e7   Tang Chen   memory-hotplug: r...
1894

0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1895
1896
1897
1898
1899
1900
1901
  /**
   * remove_memory
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call, as required by
   * try_offline_node().
   */
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1902
  void __ref remove_memory(int nid, u64 start, u64 size)
bbc76be67   Wen Congyang   memory-hotplug: r...
1903
  {
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1904
  	int ret;
993c1aad8   Wen Congyang   memory-hotplug: t...
1905

27356f54c   Toshi Kani   mm/hotplug: verif...
1906
  	BUG_ON(check_hotplug_memory_range(start, size));
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1907
  	mem_hotplug_begin();
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1908
1909
  
  	/*
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1910
1911
1912
  	 * All memory blocks must be offlined before removing memory.  Check
  	 * whether all memory blocks in question are offline and trigger a BUG()
  	 * if this is not the case.
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1913
  	 */
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1914
  	ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
d6de9d534   Xishi Qiu   mm/memory_hotplug...
1915
  				check_memblock_offlined_cb);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1916
  	if (ret)
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1917
  		BUG();
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1918

46c66c4b7   Yasuaki Ishimatsu   memory-hotplug: r...
1919
1920
  	/* remove memmap entry */
  	firmware_map_remove(start, start + size, "System RAM");
24d335ca3   Wen Congyang   memory-hotplug: i...
1921
  	arch_remove_memory(start, size);
60a5a19e7   Tang Chen   memory-hotplug: r...
1922
  	try_offline_node(nid);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1923
  	mem_hotplug_done();
71088785c   Badari Pulavarty   mm: cleanup to ma...
1924
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1925
  EXPORT_SYMBOL_GPL(remove_memory);
aba6efc47   Rafael J. Wysocki   Memory hotplug: M...
1926
  #endif /* CONFIG_MEMORY_HOTREMOVE */