Blame view

mm/memory_hotplug.c 48.4 KB
3947be196   Dave Hansen   [PATCH] memory ho...
1
2
3
4
5
  /*
   *  linux/mm/memory_hotplug.c
   *
   *  Copyright (C)
   */
3947be196   Dave Hansen   [PATCH] memory ho...
6
7
8
9
10
  #include <linux/stddef.h>
  #include <linux/mm.h>
  #include <linux/swap.h>
  #include <linux/interrupt.h>
  #include <linux/pagemap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
11
  #include <linux/compiler.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
12
  #include <linux/export.h>
3947be196   Dave Hansen   [PATCH] memory ho...
13
  #include <linux/pagevec.h>
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
14
  #include <linux/writeback.h>
3947be196   Dave Hansen   [PATCH] memory ho...
15
16
17
18
19
20
21
  #include <linux/slab.h>
  #include <linux/sysctl.h>
  #include <linux/cpu.h>
  #include <linux/memory.h>
  #include <linux/memory_hotplug.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
22
  #include <linux/ioport.h>
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
23
24
25
  #include <linux/delay.h>
  #include <linux/migrate.h>
  #include <linux/page-isolation.h>
71088785c   Badari Pulavarty   mm: cleanup to ma...
26
  #include <linux/pfn.h>
6ad696d2c   Andi Kleen   mm: allow memory ...
27
  #include <linux/suspend.h>
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
28
  #include <linux/mm_inline.h>
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
29
  #include <linux/firmware-map.h>
60a5a19e7   Tang Chen   memory-hotplug: r...
30
  #include <linux/stop_machine.h>
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
31
  #include <linux/hugetlb.h>
c5320926e   Tang Chen   mem-hotplug: intr...
32
  #include <linux/memblock.h>
3947be196   Dave Hansen   [PATCH] memory ho...
33
34
  
  #include <asm/tlbflush.h>
1e5ad9a3b   Adrian Bunk   mm/memory_hotplug...
35
  #include "internal.h"
9d0ad8ca4   Daniel Kiper   mm: extend memory...
36
37
38
39
40
41
42
43
44
45
  /*
   * online_page_callback contains pointer to current page onlining function.
   * Initially it is generic_online_page(). If it is required it could be
   * changed by calling set_online_page_callback() for callback registration
   * and restore_online_page_callback() for generic callback restore.
   */
  
  static void generic_online_page(struct page *page);
  
  static online_page_callback_t online_page_callback = generic_online_page;
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
46
47
48
49
50
  DEFINE_MUTEX(mem_hotplug_mutex);
  
  void lock_memory_hotplug(void)
  {
  	mutex_lock(&mem_hotplug_mutex);
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
51
52
53
54
  }
  
  void unlock_memory_hotplug(void)
  {
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
55
56
  	mutex_unlock(&mem_hotplug_mutex);
  }
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
57
58
59
60
61
62
63
64
65
66
  /* add this memory to iomem resource */
  static struct resource *register_memory_resource(u64 start, u64 size)
  {
  	struct resource *res;
  	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
  	BUG_ON(!res);
  
  	res->name = "System RAM";
  	res->start = start;
  	res->end = start + size - 1;
887c3cb18   Yasunori Goto   Add IORESOUCE_BUS...
67
  	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
68
  	if (request_resource(&iomem_resource, res) < 0) {
4996eed86   Toshi Kani   mm/memory_hotplug...
69
70
  		pr_debug("System RAM resource %pR cannot be added
  ", res);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
71
72
73
74
75
76
77
78
79
80
81
82
83
84
  		kfree(res);
  		res = NULL;
  	}
  	return res;
  }
  
  static void release_memory_resource(struct resource *res)
  {
  	if (!res)
  		return;
  	release_resource(res);
  	kfree(res);
  	return;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
85
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
86
87
  void get_page_bootmem(unsigned long info,  struct page *page,
  		      unsigned long type)
047532787   Yasunori Goto   memory hotplug: r...
88
  {
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
89
  	page->lru.next = (struct list_head *) type;
047532787   Yasunori Goto   memory hotplug: r...
90
91
92
93
  	SetPagePrivate(page);
  	set_page_private(page, info);
  	atomic_inc(&page->_count);
  }
170a5a7eb   Jiang Liu   mm: make __free_p...
94
  void put_page_bootmem(struct page *page)
047532787   Yasunori Goto   memory hotplug: r...
95
  {
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
96
  	unsigned long type;
047532787   Yasunori Goto   memory hotplug: r...
97

5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
98
99
100
  	type = (unsigned long) page->lru.next;
  	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
  	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
047532787   Yasunori Goto   memory hotplug: r...
101
102
103
104
  
  	if (atomic_dec_return(&page->_count) == 1) {
  		ClearPagePrivate(page);
  		set_page_private(page, 0);
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
105
  		INIT_LIST_HEAD(&page->lru);
170a5a7eb   Jiang Liu   mm: make __free_p...
106
  		free_reserved_page(page);
047532787   Yasunori Goto   memory hotplug: r...
107
  	}
047532787   Yasunori Goto   memory hotplug: r...
108
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
109
110
  #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
d92bc3185   Adrian Bunk   mm: make register...
111
  static void register_page_bootmem_info_section(unsigned long start_pfn)
047532787   Yasunori Goto   memory hotplug: r...
112
113
114
115
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
047532787   Yasunori Goto   memory hotplug: r...
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	/* Get section's memmap address */
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	/*
  	 * Get page for the memmap's phys address
  	 * XXX: need more consideration for sparse_vmemmap...
  	 */
  	page = virt_to_page(memmap);
  	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
  	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
  
  	/* remember memmap's page */
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, SECTION_INFO);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
af370fb8c   Yasunori Goto   memory hotplug: s...
140
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
047532787   Yasunori Goto   memory hotplug: r...
141
142
  
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
  #else /* CONFIG_SPARSEMEM_VMEMMAP */
  static void register_page_bootmem_info_section(unsigned long start_pfn)
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
  
  	if (!pfn_valid(start_pfn))
  		return;
  
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
047532787   Yasunori Goto   memory hotplug: r...
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
  
  void register_page_bootmem_info_node(struct pglist_data *pgdat)
  {
  	unsigned long i, pfn, end_pfn, nr_pages;
  	int node = pgdat->node_id;
  	struct page *page;
  	struct zone *zone;
  
  	nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
  	page = virt_to_page(pgdat);
  
  	for (i = 0; i < nr_pages; i++, page++)
  		get_page_bootmem(node, page, NODE_INFO);
  
  	zone = &pgdat->node_zones[0];
  	for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
139c2d75b   Xishi Qiu   mm: use zone_is_i...
185
  		if (zone_is_initialized(zone)) {
047532787   Yasunori Goto   memory hotplug: r...
186
187
188
189
190
191
192
193
194
195
196
  			nr_pages = zone->wait_table_hash_nr_entries
  				* sizeof(wait_queue_head_t);
  			nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
  			page = virt_to_page(zone->wait_table);
  
  			for (i = 0; i < nr_pages; i++, page++)
  				get_page_bootmem(node, page, NODE_INFO);
  		}
  	}
  
  	pfn = pgdat->node_start_pfn;
c1f194952   Cody P Schafer   mm/memory_hotplug...
197
  	end_pfn = pgdat_end_pfn(pgdat);
047532787   Yasunori Goto   memory hotplug: r...
198

7e9f5eb03   Tang Chen   mm/memory_hotplug...
199
  	/* register section info */
f14851af0   qiuxishi   memory hotplug: f...
200
201
202
203
204
  	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		/*
  		 * Some platforms can assign the same pfn to multiple nodes - on
  		 * node0 as well as nodeN.  To avoid registering a pfn against
  		 * multiple nodes we check that this pfn does not already
7e9f5eb03   Tang Chen   mm/memory_hotplug...
205
  		 * reside in some other nodes.
f14851af0   qiuxishi   memory hotplug: f...
206
207
208
209
  		 */
  		if (pfn_valid(pfn) && (pfn_to_nid(pfn) == node))
  			register_page_bootmem_info_section(pfn);
  	}
047532787   Yasunori Goto   memory hotplug: r...
210
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
211
  #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
047532787   Yasunori Goto   memory hotplug: r...
212

76cdd58e5   Heiko Carstens   memory_hotplug: a...
213
214
215
216
217
218
  static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
  			   unsigned long end_pfn)
  {
  	unsigned long old_zone_end_pfn;
  
  	zone_span_writelock(zone);
c33bc315f   Xishi Qiu   mm: use zone_end_...
219
  	old_zone_end_pfn = zone_end_pfn(zone);
8080fc038   Xishi Qiu   mm: use zone_is_e...
220
  	if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
76cdd58e5   Heiko Carstens   memory_hotplug: a...
221
222
223
224
225
226
227
  		zone->zone_start_pfn = start_pfn;
  
  	zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
  				zone->zone_start_pfn;
  
  	zone_span_writeunlock(zone);
  }
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
228
229
230
231
  static void resize_zone(struct zone *zone, unsigned long start_pfn,
  		unsigned long end_pfn)
  {
  	zone_span_writelock(zone);
e455a9b92   Lai Jiangshan   memory_hotplug: h...
232
233
234
235
236
237
238
239
240
241
242
  	if (end_pfn - start_pfn) {
  		zone->zone_start_pfn = start_pfn;
  		zone->spanned_pages = end_pfn - start_pfn;
  	} else {
  		/*
  		 * make it consist as free_area_init_core(),
  		 * if spanned_pages = 0, then keep start_pfn = 0
  		 */
  		zone->zone_start_pfn = 0;
  		zone->spanned_pages = 0;
  	}
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
243
244
245
246
247
248
249
250
251
252
253
254
255
256
  
  	zone_span_writeunlock(zone);
  }
  
  static void fix_zone_id(struct zone *zone, unsigned long start_pfn,
  		unsigned long end_pfn)
  {
  	enum zone_type zid = zone_idx(zone);
  	int nid = zone->zone_pgdat->node_id;
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn++)
  		set_page_links(pfn_to_page(pfn), zid, nid, pfn);
  }
f6bbb78e5   Cody P Schafer   mm: add helper en...
257
  /* Can fail with -ENOMEM from allocating a wait table with vmalloc() or
9e43aa2b8   Santosh Shilimkar   mm/memory_hotplug...
258
   * alloc_bootmem_node_nopanic()/memblock_virt_alloc_node_nopanic() */
f6bbb78e5   Cody P Schafer   mm: add helper en...
259
260
261
262
263
264
265
266
  static int __ref ensure_zone_is_initialized(struct zone *zone,
  			unsigned long start_pfn, unsigned long num_pages)
  {
  	if (!zone_is_initialized(zone))
  		return init_currently_empty_zone(zone, start_pfn, num_pages,
  						 MEMMAP_HOTPLUG);
  	return 0;
  }
e455a9b92   Lai Jiangshan   memory_hotplug: h...
267
  static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
268
269
  		unsigned long start_pfn, unsigned long end_pfn)
  {
e455a9b92   Lai Jiangshan   memory_hotplug: h...
270
  	int ret;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
271
  	unsigned long flags;
e455a9b92   Lai Jiangshan   memory_hotplug: h...
272
  	unsigned long z1_start_pfn;
64dd1b29b   Cody P Schafer   mm/memory_hotplug...
273
274
275
  	ret = ensure_zone_is_initialized(z1, start_pfn, end_pfn - start_pfn);
  	if (ret)
  		return ret;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
276
277
278
279
  
  	pgdat_resize_lock(z1->zone_pgdat, &flags);
  
  	/* can't move pfns which are higher than @z2 */
108bcc96e   Cody P Schafer   mm: add & use zon...
280
  	if (end_pfn > zone_end_pfn(z2))
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
281
  		goto out_fail;
834405c3b   Jiang Liu   mm: fix some triv...
282
  	/* the move out part must be at the left most of @z2 */
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
283
284
285
286
287
  	if (start_pfn > z2->zone_start_pfn)
  		goto out_fail;
  	/* must included/overlap */
  	if (end_pfn <= z2->zone_start_pfn)
  		goto out_fail;
e455a9b92   Lai Jiangshan   memory_hotplug: h...
288
  	/* use start_pfn for z1's start_pfn if z1 is empty */
8080fc038   Xishi Qiu   mm: use zone_is_e...
289
  	if (!zone_is_empty(z1))
e455a9b92   Lai Jiangshan   memory_hotplug: h...
290
291
292
293
294
  		z1_start_pfn = z1->zone_start_pfn;
  	else
  		z1_start_pfn = start_pfn;
  
  	resize_zone(z1, z1_start_pfn, end_pfn);
108bcc96e   Cody P Schafer   mm: add & use zon...
295
  	resize_zone(z2, end_pfn, zone_end_pfn(z2));
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
296
297
298
299
300
301
302
303
304
305
  
  	pgdat_resize_unlock(z1->zone_pgdat, &flags);
  
  	fix_zone_id(z1, start_pfn, end_pfn);
  
  	return 0;
  out_fail:
  	pgdat_resize_unlock(z1->zone_pgdat, &flags);
  	return -1;
  }
e455a9b92   Lai Jiangshan   memory_hotplug: h...
306
  static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2,
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
307
308
  		unsigned long start_pfn, unsigned long end_pfn)
  {
e455a9b92   Lai Jiangshan   memory_hotplug: h...
309
  	int ret;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
310
  	unsigned long flags;
e455a9b92   Lai Jiangshan   memory_hotplug: h...
311
  	unsigned long z2_end_pfn;
64dd1b29b   Cody P Schafer   mm/memory_hotplug...
312
313
314
  	ret = ensure_zone_is_initialized(z2, start_pfn, end_pfn - start_pfn);
  	if (ret)
  		return ret;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
315
316
317
318
319
320
321
  
  	pgdat_resize_lock(z1->zone_pgdat, &flags);
  
  	/* can't move pfns which are lower than @z1 */
  	if (z1->zone_start_pfn > start_pfn)
  		goto out_fail;
  	/* the move out part mast at the right most of @z1 */
108bcc96e   Cody P Schafer   mm: add & use zon...
322
  	if (zone_end_pfn(z1) >  end_pfn)
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
323
324
  		goto out_fail;
  	/* must included/overlap */
108bcc96e   Cody P Schafer   mm: add & use zon...
325
  	if (start_pfn >= zone_end_pfn(z1))
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
326
  		goto out_fail;
e455a9b92   Lai Jiangshan   memory_hotplug: h...
327
  	/* use end_pfn for z2's end_pfn if z2 is empty */
8080fc038   Xishi Qiu   mm: use zone_is_e...
328
  	if (!zone_is_empty(z2))
108bcc96e   Cody P Schafer   mm: add & use zon...
329
  		z2_end_pfn = zone_end_pfn(z2);
e455a9b92   Lai Jiangshan   memory_hotplug: h...
330
331
  	else
  		z2_end_pfn = end_pfn;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
332
  	resize_zone(z1, z1->zone_start_pfn, start_pfn);
e455a9b92   Lai Jiangshan   memory_hotplug: h...
333
  	resize_zone(z2, start_pfn, z2_end_pfn);
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
334
335
336
337
338
339
340
341
342
343
  
  	pgdat_resize_unlock(z1->zone_pgdat, &flags);
  
  	fix_zone_id(z2, start_pfn, end_pfn);
  
  	return 0;
  out_fail:
  	pgdat_resize_unlock(z1->zone_pgdat, &flags);
  	return -1;
  }
76cdd58e5   Heiko Carstens   memory_hotplug: a...
344
345
346
  static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
  			    unsigned long end_pfn)
  {
83285c72e   Xishi Qiu   mm: use pgdat_end...
347
  	unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
76cdd58e5   Heiko Carstens   memory_hotplug: a...
348

712cd386f   Tang Chen   mm/memory_hotplug...
349
  	if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
76cdd58e5   Heiko Carstens   memory_hotplug: a...
350
351
352
353
354
  		pgdat->node_start_pfn = start_pfn;
  
  	pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
  					pgdat->node_start_pfn;
  }
31168481c   Al Viro   meminit section w...
355
  static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
3947be196   Dave Hansen   [PATCH] memory ho...
356
357
358
359
360
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nr_pages = PAGES_PER_SECTION;
  	int nid = pgdat->node_id;
  	int zone_type;
76cdd58e5   Heiko Carstens   memory_hotplug: a...
361
  	unsigned long flags;
64dd1b29b   Cody P Schafer   mm/memory_hotplug...
362
  	int ret;
3947be196   Dave Hansen   [PATCH] memory ho...
363
364
  
  	zone_type = zone - pgdat->node_zones;
64dd1b29b   Cody P Schafer   mm/memory_hotplug...
365
366
367
  	ret = ensure_zone_is_initialized(zone, phys_start_pfn, nr_pages);
  	if (ret)
  		return ret;
76cdd58e5   Heiko Carstens   memory_hotplug: a...
368

76cdd58e5   Heiko Carstens   memory_hotplug: a...
369
370
371
372
373
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages);
  	grow_pgdat_span(zone->zone_pgdat, phys_start_pfn,
  			phys_start_pfn + nr_pages);
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
a2f3aa025   Dave Hansen   [PATCH] Fix spars...
374
375
  	memmap_init_zone(nr_pages, nid, zone_type,
  			 phys_start_pfn, MEMMAP_HOTPLUG);
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
376
  	return 0;
3947be196   Dave Hansen   [PATCH] memory ho...
377
  }
c04fc586c   Gary Hade   mm: show node to ...
378
379
  static int __meminit __add_section(int nid, struct zone *zone,
  					unsigned long phys_start_pfn)
3947be196   Dave Hansen   [PATCH] memory ho...
380
  {
3947be196   Dave Hansen   [PATCH] memory ho...
381
  	int ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
382
383
  	if (pfn_valid(phys_start_pfn))
  		return -EEXIST;
85b35feae   Zhang Yanfei   mm/sparsemem: use...
384
  	ret = sparse_add_one_section(zone, phys_start_pfn);
3947be196   Dave Hansen   [PATCH] memory ho...
385
386
387
  
  	if (ret < 0)
  		return ret;
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
388
389
390
391
  	ret = __add_zone(zone, phys_start_pfn);
  
  	if (ret < 0)
  		return ret;
c04fc586c   Gary Hade   mm: show node to ...
392
  	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
3947be196   Dave Hansen   [PATCH] memory ho...
393
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
  /*
   * Reasonably generic function for adding memory.  It is
   * expected that archs that support memory hotplug will
   * call this function after deciding the zone to which to
   * add the new pages.
   */
  int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
  			unsigned long nr_pages)
  {
  	unsigned long i;
  	int err = 0;
  	int start_sec, end_sec;
  	/* during initialize mem_map, align hot-added range to section */
  	start_sec = pfn_to_section_nr(phys_start_pfn);
  	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
  
  	for (i = start_sec; i <= end_sec; i++) {
  		err = __add_section(nid, zone, i << PFN_SECTION_SHIFT);
  
  		/*
  		 * EEXIST is finally dealt with by ioresource collision
  		 * check. see add_memory() => register_memory_resource()
  		 * Warning will be printed if there is collision.
  		 */
  		if (err && (err != -EEXIST))
  			break;
  		err = 0;
  	}
  
  	return err;
  }
  EXPORT_SYMBOL_GPL(__add_pages);
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
  /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
  static int find_smallest_section_pfn(int nid, struct zone *zone,
  				     unsigned long start_pfn,
  				     unsigned long end_pfn)
  {
  	struct mem_section *ms;
  
  	for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(start_pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (unlikely(pfn_to_nid(start_pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(start_pfn)))
  			continue;
  
  		return start_pfn;
  	}
  
  	return 0;
  }
  
  /* find the biggest valid pfn in the range [start_pfn, end_pfn). */
  static int find_biggest_section_pfn(int nid, struct zone *zone,
  				    unsigned long start_pfn,
  				    unsigned long end_pfn)
  {
  	struct mem_section *ms;
  	unsigned long pfn;
  
  	/* pfn is the end pfn of a memory section. */
  	pfn = end_pfn - 1;
  	for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (unlikely(pfn_to_nid(pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(pfn)))
  			continue;
  
  		return pfn;
  	}
  
  	return 0;
  }
  
  static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
  			     unsigned long end_pfn)
  {
c33bc315f   Xishi Qiu   mm: use zone_end_...
484
485
486
  	unsigned long zone_start_pfn = zone->zone_start_pfn;
  	unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
  	unsigned long zone_end_pfn = z;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  	unsigned long pfn;
  	struct mem_section *ms;
  	int nid = zone_to_nid(zone);
  
  	zone_span_writelock(zone);
  	if (zone_start_pfn == start_pfn) {
  		/*
  		 * If the section is smallest section in the zone, it need
  		 * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
  		 * In this case, we find second smallest valid mem_section
  		 * for shrinking zone.
  		 */
  		pfn = find_smallest_section_pfn(nid, zone, end_pfn,
  						zone_end_pfn);
  		if (pfn) {
  			zone->zone_start_pfn = pfn;
  			zone->spanned_pages = zone_end_pfn - pfn;
  		}
  	} else if (zone_end_pfn == end_pfn) {
  		/*
  		 * If the section is biggest section in the zone, it need
  		 * shrink zone->spanned_pages.
  		 * In this case, we find second biggest valid mem_section for
  		 * shrinking zone.
  		 */
  		pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
  					       start_pfn);
  		if (pfn)
  			zone->spanned_pages = pfn - zone_start_pfn + 1;
  	}
  
  	/*
  	 * The section is not biggest or smallest mem_section in the zone, it
  	 * only creates a hole in the zone. So in this case, we need not
  	 * change the zone. But perhaps, the zone has only hole data. Thus
  	 * it check the zone has only hole or not.
  	 */
  	pfn = zone_start_pfn;
  	for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (page_zone(pfn_to_page(pfn)) != zone)
  			continue;
  
  		 /* If the section is current section, it continues the loop */
  		if (start_pfn == pfn)
  			continue;
  
  		/* If we find valid section, we have nothing to do */
  		zone_span_writeunlock(zone);
  		return;
  	}
  
  	/* The zone has no valid section */
  	zone->zone_start_pfn = 0;
  	zone->spanned_pages = 0;
  	zone_span_writeunlock(zone);
  }
  
  static void shrink_pgdat_span(struct pglist_data *pgdat,
  			      unsigned long start_pfn, unsigned long end_pfn)
  {
83285c72e   Xishi Qiu   mm: use pgdat_end...
552
553
554
  	unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
  	unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
  	unsigned long pgdat_end_pfn = p;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
  	unsigned long pfn;
  	struct mem_section *ms;
  	int nid = pgdat->node_id;
  
  	if (pgdat_start_pfn == start_pfn) {
  		/*
  		 * If the section is smallest section in the pgdat, it need
  		 * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
  		 * In this case, we find second smallest valid mem_section
  		 * for shrinking zone.
  		 */
  		pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
  						pgdat_end_pfn);
  		if (pfn) {
  			pgdat->node_start_pfn = pfn;
  			pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
  		}
  	} else if (pgdat_end_pfn == end_pfn) {
  		/*
  		 * If the section is biggest section in the pgdat, it need
  		 * shrink pgdat->node_spanned_pages.
  		 * In this case, we find second biggest valid mem_section for
  		 * shrinking zone.
  		 */
  		pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
  					       start_pfn);
  		if (pfn)
  			pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
  	}
  
  	/*
  	 * If the section is not biggest or smallest mem_section in the pgdat,
  	 * it only creates a hole in the pgdat. So in this case, we need not
  	 * change the pgdat.
  	 * But perhaps, the pgdat has only hole data. Thus it check the pgdat
  	 * has only hole or not.
  	 */
  	pfn = pgdat_start_pfn;
  	for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (pfn_to_nid(pfn) != nid)
  			continue;
  
  		 /* If the section is current section, it continues the loop */
  		if (start_pfn == pfn)
  			continue;
  
  		/* If we find valid section, we have nothing to do */
  		return;
  	}
  
  	/* The pgdat has no valid section */
  	pgdat->node_start_pfn = 0;
  	pgdat->node_spanned_pages = 0;
  }
  
  static void __remove_zone(struct zone *zone, unsigned long start_pfn)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nr_pages = PAGES_PER_SECTION;
  	int zone_type;
  	unsigned long flags;
  
  	zone_type = zone - pgdat->node_zones;
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
  	shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
629
630
  static int __remove_section(struct zone *zone, struct mem_section *ms)
  {
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
631
632
  	unsigned long start_pfn;
  	int scn_nr;
ea01ea937   Badari Pulavarty   hotplug memory re...
633
634
635
636
637
638
639
640
  	int ret = -EINVAL;
  
  	if (!valid_section(ms))
  		return ret;
  
  	ret = unregister_memory_section(ms);
  	if (ret)
  		return ret;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
641
642
643
  	scn_nr = __section_nr(ms);
  	start_pfn = section_nr_to_pfn(scn_nr);
  	__remove_zone(zone, start_pfn);
ea01ea937   Badari Pulavarty   hotplug memory re...
644
  	sparse_remove_one_section(zone, ms);
ea01ea937   Badari Pulavarty   hotplug memory re...
645
646
  	return 0;
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
647
648
649
650
651
652
653
654
655
656
657
658
659
660
  /**
   * __remove_pages() - remove sections of pages from a zone
   * @zone: zone from which pages need to be removed
   * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
   * @nr_pages: number of pages to remove (must be multiple of section size)
   *
   * Generic helper function to remove section mappings and sysfs entries
   * for the section of the memory we are removing. Caller needs to make
   * sure that pages are marked reserved and zones are adjust properly by
   * calling offline_pages().
   */
  int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
  		 unsigned long nr_pages)
  {
fe74ebb10   Toshi Kani   mm: change __remo...
661
  	unsigned long i;
ea01ea937   Badari Pulavarty   hotplug memory re...
662
  	int sections_to_remove;
fe74ebb10   Toshi Kani   mm: change __remo...
663
664
  	resource_size_t start, size;
  	int ret = 0;
ea01ea937   Badari Pulavarty   hotplug memory re...
665
666
667
668
669
670
  
  	/*
  	 * We can only remove entire sections
  	 */
  	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
  	BUG_ON(nr_pages % PAGES_PER_SECTION);
fe74ebb10   Toshi Kani   mm: change __remo...
671
672
673
  	start = phys_start_pfn << PAGE_SHIFT;
  	size = nr_pages * PAGE_SIZE;
  	ret = release_mem_region_adjustable(&iomem_resource, start, size);
348f9f05e   Randy Dunlap   mm/memory_hotplug...
674
675
676
677
678
679
680
  	if (ret) {
  		resource_size_t endres = start + size - 1;
  
  		pr_warn("Unable to release resource <%pa-%pa> (%d)
  ",
  				&start, &endres, ret);
  	}
d760afd4d   Yasuaki Ishimatsu   memory-hotplug: s...
681

ea01ea937   Badari Pulavarty   hotplug memory re...
682
683
684
685
686
687
688
689
690
691
  	sections_to_remove = nr_pages / PAGES_PER_SECTION;
  	for (i = 0; i < sections_to_remove; i++) {
  		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
  		ret = __remove_section(zone, __pfn_to_section(pfn));
  		if (ret)
  			break;
  	}
  	return ret;
  }
  EXPORT_SYMBOL_GPL(__remove_pages);
4edd7ceff   David Rientjes   mm, hotplug: avoi...
692
  #endif /* CONFIG_MEMORY_HOTREMOVE */
ea01ea937   Badari Pulavarty   hotplug memory re...
693

9d0ad8ca4   Daniel Kiper   mm: extend memory...
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
  int set_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
  
  	lock_memory_hotplug();
  
  	if (online_page_callback == generic_online_page) {
  		online_page_callback = callback;
  		rc = 0;
  	}
  
  	unlock_memory_hotplug();
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(set_online_page_callback);
  
  int restore_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
  
  	lock_memory_hotplug();
  
  	if (online_page_callback == callback) {
  		online_page_callback = generic_online_page;
  		rc = 0;
  	}
  
  	unlock_memory_hotplug();
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(restore_online_page_callback);
  
  void __online_page_set_limits(struct page *page)
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
729
  {
9d0ad8ca4   Daniel Kiper   mm: extend memory...
730
731
732
733
734
  }
  EXPORT_SYMBOL_GPL(__online_page_set_limits);
  
  void __online_page_increment_counters(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
735
  	adjust_managed_page_count(page, 1);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
736
737
  }
  EXPORT_SYMBOL_GPL(__online_page_increment_counters);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
738

9d0ad8ca4   Daniel Kiper   mm: extend memory...
739
740
  void __online_page_free(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
741
  	__free_reserved_page(page);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
742
  }
9d0ad8ca4   Daniel Kiper   mm: extend memory...
743
744
745
746
747
748
749
750
  EXPORT_SYMBOL_GPL(__online_page_free);
  
  static void generic_online_page(struct page *page)
  {
  	__online_page_set_limits(page);
  	__online_page_increment_counters(page);
  	__online_page_free(page);
  }
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
751

75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
752
753
  static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
  			void *arg)
3947be196   Dave Hansen   [PATCH] memory ho...
754
755
  {
  	unsigned long i;
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
756
757
758
759
760
  	unsigned long onlined_pages = *(unsigned long *)arg;
  	struct page *page;
  	if (PageReserved(pfn_to_page(start_pfn)))
  		for (i = 0; i < nr_pages; i++) {
  			page = pfn_to_page(start_pfn + i);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
761
  			(*online_page_callback)(page);
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
762
763
764
765
766
  			onlined_pages++;
  		}
  	*(unsigned long *)arg = onlined_pages;
  	return 0;
  }
09285af75   Lai Jiangshan   memory_hotplug: a...
767
  #ifdef CONFIG_MOVABLE_NODE
79a4dcefd   Tang Chen   mm/memory_hotplug...
768
769
770
771
  /*
   * When CONFIG_MOVABLE_NODE, we permit onlining of a node which doesn't have
   * normal memory.
   */
09285af75   Lai Jiangshan   memory_hotplug: a...
772
773
774
775
  static bool can_online_high_movable(struct zone *zone)
  {
  	return true;
  }
79a4dcefd   Tang Chen   mm/memory_hotplug...
776
  #else /* CONFIG_MOVABLE_NODE */
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
777
778
779
780
781
  /* ensure every online node has NORMAL memory */
  static bool can_online_high_movable(struct zone *zone)
  {
  	return node_state(zone_to_nid(zone), N_NORMAL_MEMORY);
  }
79a4dcefd   Tang Chen   mm/memory_hotplug...
782
  #endif /* CONFIG_MOVABLE_NODE */
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
783

d9713679d   Lai Jiangshan   memory_hotplug: f...
784
785
786
787
788
789
790
791
  /* check which state of node_states will be changed when online memory */
  static void node_states_check_changes_online(unsigned long nr_pages,
  	struct zone *zone, struct memory_notify *arg)
  {
  	int nid = zone_to_nid(zone);
  	enum zone_type zone_last = ZONE_NORMAL;
  
  	/*
6715ddf94   Lai Jiangshan   hotplug: update n...
792
793
794
  	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_NORMAL,
  	 * set zone_last to ZONE_NORMAL.
d9713679d   Lai Jiangshan   memory_hotplug: f...
795
  	 *
6715ddf94   Lai Jiangshan   hotplug: update n...
796
797
798
  	 * If we don't have HIGHMEM nor movable node,
  	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
  	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
d9713679d   Lai Jiangshan   memory_hotplug: f...
799
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
800
  	if (N_MEMORY == N_NORMAL_MEMORY)
d9713679d   Lai Jiangshan   memory_hotplug: f...
801
802
803
804
805
806
807
808
809
810
811
812
  		zone_last = ZONE_MOVABLE;
  
  	/*
  	 * if the memory to be online is in a zone of 0...zone_last, and
  	 * the zones of 0...zone_last don't have memory before online, we will
  	 * need to set the node to node_states[N_NORMAL_MEMORY] after
  	 * the memory is online.
  	 */
  	if (zone_idx(zone) <= zone_last && !node_state(nid, N_NORMAL_MEMORY))
  		arg->status_change_nid_normal = nid;
  	else
  		arg->status_change_nid_normal = -1;
6715ddf94   Lai Jiangshan   hotplug: update n...
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
  #ifdef CONFIG_HIGHMEM
  	/*
  	 * If we have movable node, node_states[N_HIGH_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
  	 * set zone_last to ZONE_HIGHMEM.
  	 *
  	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_MOVABLE,
  	 * set zone_last to ZONE_MOVABLE.
  	 */
  	zone_last = ZONE_HIGHMEM;
  	if (N_MEMORY == N_HIGH_MEMORY)
  		zone_last = ZONE_MOVABLE;
  
  	if (zone_idx(zone) <= zone_last && !node_state(nid, N_HIGH_MEMORY))
  		arg->status_change_nid_high = nid;
  	else
  		arg->status_change_nid_high = -1;
  #else
  	arg->status_change_nid_high = arg->status_change_nid_normal;
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
834
835
  	/*
  	 * if the node don't have memory befor online, we will need to
6715ddf94   Lai Jiangshan   hotplug: update n...
836
  	 * set the node to node_states[N_MEMORY] after the memory
d9713679d   Lai Jiangshan   memory_hotplug: f...
837
838
  	 * is online.
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
839
  	if (!node_state(nid, N_MEMORY))
d9713679d   Lai Jiangshan   memory_hotplug: f...
840
841
842
843
844
845
846
847
848
  		arg->status_change_nid = nid;
  	else
  		arg->status_change_nid = -1;
  }
  
  static void node_states_set_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_set_state(node, N_NORMAL_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
849
850
851
852
  	if (arg->status_change_nid_high >= 0)
  		node_set_state(node, N_HIGH_MEMORY);
  
  	node_set_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
853
  }
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
854

511c2aba8   Lai Jiangshan   mm, memory-hotplu...
855
  int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
856
  {
aa47228a1   Cody P Schafer   memory_hotplug: u...
857
  	unsigned long flags;
3947be196   Dave Hansen   [PATCH] memory ho...
858
859
  	unsigned long onlined_pages = 0;
  	struct zone *zone;
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
860
  	int need_zonelists_rebuild = 0;
7b78d335a   Yasunori Goto   memory hotplug: r...
861
862
863
  	int nid;
  	int ret;
  	struct memory_notify arg;
925268a06   KAMEZAWA Hiroyuki   memory hotplug: o...
864
  	lock_memory_hotplug();
d9713679d   Lai Jiangshan   memory_hotplug: f...
865
866
867
868
869
870
  	/*
  	 * This doesn't need a lock to do pfn_to_page().
  	 * The section can't be removed here because of the
  	 * memory_block->state_mutex.
  	 */
  	zone = page_zone(pfn_to_page(pfn));
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
871
872
873
  	if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
  	    !can_online_high_movable(zone)) {
  		unlock_memory_hotplug();
0a1be1509   Toshi Kani   mm/memory_hotplug...
874
  		return -EINVAL;
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
875
  	}
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
876
877
878
  	if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
  		if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) {
  			unlock_memory_hotplug();
0a1be1509   Toshi Kani   mm/memory_hotplug...
879
  			return -EINVAL;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
880
881
882
883
884
  		}
  	}
  	if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
  		if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) {
  			unlock_memory_hotplug();
0a1be1509   Toshi Kani   mm/memory_hotplug...
885
  			return -EINVAL;
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
886
887
888
889
890
  		}
  	}
  
  	/* Previous code may changed the zone of the pfn range */
  	zone = page_zone(pfn_to_page(pfn));
7b78d335a   Yasunori Goto   memory hotplug: r...
891
892
  	arg.start_pfn = pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
893
  	node_states_check_changes_online(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
894

9c2606b77   Xishi Qiu   mm/memory_hotplug...
895
  	nid = pfn_to_nid(pfn);
3947be196   Dave Hansen   [PATCH] memory ho...
896

7b78d335a   Yasunori Goto   memory hotplug: r...
897
898
899
900
  	ret = memory_notify(MEM_GOING_ONLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret) {
  		memory_notify(MEM_CANCEL_ONLINE, &arg);
925268a06   KAMEZAWA Hiroyuki   memory hotplug: o...
901
  		unlock_memory_hotplug();
7b78d335a   Yasunori Goto   memory hotplug: r...
902
903
  		return ret;
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
904
  	/*
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
905
906
907
908
  	 * If this zone is not populated, then it is not in zonelist.
  	 * This means the page allocator ignores this zone.
  	 * So, zonelist must be updated after online.
  	 */
4eaf3f643   Haicheng Li   mem-hotplug: fix ...
909
  	mutex_lock(&zonelists_mutex);
6dcd73d70   Wen Congyang   memory-hotplug: a...
910
  	if (!populated_zone(zone)) {
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
911
  		need_zonelists_rebuild = 1;
6dcd73d70   Wen Congyang   memory-hotplug: a...
912
913
  		build_all_zonelists(NULL, zone);
  	}
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
914

908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
915
  	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
916
  		online_pages_range);
fd8a4221a   Geoff Levand   memory_hotplug: c...
917
  	if (ret) {
6dcd73d70   Wen Congyang   memory-hotplug: a...
918
919
  		if (need_zonelists_rebuild)
  			zone_pcp_reset(zone);
4eaf3f643   Haicheng Li   mem-hotplug: fix ...
920
  		mutex_unlock(&zonelists_mutex);
a62e2f4f5   Bjorn Helgaas   mm: print physica...
921
922
923
924
925
  		printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] failed
  ",
  		       (unsigned long long) pfn << PAGE_SHIFT,
  		       (((unsigned long long) pfn + nr_pages)
  			    << PAGE_SHIFT) - 1);
fd8a4221a   Geoff Levand   memory_hotplug: c...
926
  		memory_notify(MEM_CANCEL_ONLINE, &arg);
925268a06   KAMEZAWA Hiroyuki   memory hotplug: o...
927
  		unlock_memory_hotplug();
fd8a4221a   Geoff Levand   memory_hotplug: c...
928
929
  		return ret;
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
930
  	zone->present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
931
932
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
f2937be58   Yasunori Goto   [PATCH] memory ho...
933
  	zone->zone_pgdat->node_present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
934
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
935
  	if (onlined_pages) {
d9713679d   Lai Jiangshan   memory_hotplug: f...
936
  		node_states_set_node(zone_to_nid(zone), &arg);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
937
  		if (need_zonelists_rebuild)
6dcd73d70   Wen Congyang   memory-hotplug: a...
938
  			build_all_zonelists(NULL, NULL);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
939
940
941
  		else
  			zone_pcp_update(zone);
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
942

4eaf3f643   Haicheng Li   mem-hotplug: fix ...
943
  	mutex_unlock(&zonelists_mutex);
1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
944
945
  
  	init_per_zone_wmark_min();
08dff7b7d   Jiang Liu   mm/hotplug: corre...
946
  	if (onlined_pages)
7ea1530ab   Christoph Lameter   Memoryless nodes:...
947
  		kswapd_run(zone_to_nid(zone));
61b13993a   Dave Hansen   [PATCH] memory ho...
948

1f522509c   Haicheng Li   mem-hotplug: avoi...
949
  	vm_total_pages = nr_free_pagecache_pages();
2f7f24eca   Kent Liu   memory-hotplug: d...
950

2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
951
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
952
953
954
  
  	if (onlined_pages)
  		memory_notify(MEM_ONLINE, &arg);
925268a06   KAMEZAWA Hiroyuki   memory hotplug: o...
955
  	unlock_memory_hotplug();
7b78d335a   Yasunori Goto   memory hotplug: r...
956

3947be196   Dave Hansen   [PATCH] memory ho...
957
958
  	return 0;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
959
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
960

e13193319   Hidetoshi Seto   mm/memory_hotplug...
961
962
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
963
964
965
966
967
  {
  	struct pglist_data *pgdat;
  	unsigned long zones_size[MAX_NR_ZONES] = {0};
  	unsigned long zholes_size[MAX_NR_ZONES] = {0};
  	unsigned long start_pfn = start >> PAGE_SHIFT;
a1e565aa3   Tang Chen   memory-hotplug: d...
968
969
970
971
972
  	pgdat = NODE_DATA(nid);
  	if (!pgdat) {
  		pgdat = arch_alloc_nodedata(nid);
  		if (!pgdat)
  			return NULL;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
973

a1e565aa3   Tang Chen   memory-hotplug: d...
974
975
  		arch_refresh_nodedata(nid, pgdat);
  	}
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
976
977
978
979
  
  	/* we can use NODE_DATA(nid) from here */
  
  	/* init node's zones as empty zones, we don't have any present pages.*/
9109fb7b3   Johannes Weiner   mm: drop unneeded...
980
  	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
981

959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
982
983
984
985
  	/*
  	 * The node we allocated has no zone fallback lists. For avoiding
  	 * to access not-initialized zonelist, build here.
  	 */
f957db4fc   David Rientjes   mm, hotplug: prot...
986
  	mutex_lock(&zonelists_mutex);
9adb62a5d   Jiang Liu   mm/hotplug: corre...
987
  	build_all_zonelists(pgdat, NULL);
f957db4fc   David Rientjes   mm, hotplug: prot...
988
  	mutex_unlock(&zonelists_mutex);
959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
989

9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
990
991
992
993
994
995
996
997
998
  	return pgdat;
  }
  
  static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
  {
  	arch_refresh_nodedata(nid, NULL);
  	arch_free_nodedata(pgdat);
  	return;
  }
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
999

01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1000
1001
1002
  /**
   * try_online_node - online a node if offlined
   *
cf23422b9   minskey guo   cpu/mem hotplug: ...
1003
1004
   * called by cpu_up() to online a node without onlined memory.
   */
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1005
  int try_online_node(int nid)
cf23422b9   minskey guo   cpu/mem hotplug: ...
1006
1007
1008
  {
  	pg_data_t	*pgdat;
  	int	ret;
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1009
1010
  	if (node_online(nid))
  		return 0;
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
1011
  	lock_memory_hotplug();
cf23422b9   minskey guo   cpu/mem hotplug: ...
1012
  	pgdat = hotadd_new_pgdat(nid, 0);
7553e8f2d   David Rientjes   mm, hotplug: fix ...
1013
  	if (!pgdat) {
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1014
1015
  		pr_err("Cannot online node %d due to NULL pgdat
  ", nid);
cf23422b9   minskey guo   cpu/mem hotplug: ...
1016
1017
1018
1019
1020
1021
  		ret = -ENOMEM;
  		goto out;
  	}
  	node_set_online(nid);
  	ret = register_one_node(nid);
  	BUG_ON(ret);
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1022
1023
1024
1025
1026
  	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
  		mutex_lock(&zonelists_mutex);
  		build_all_zonelists(NULL, NULL);
  		mutex_unlock(&zonelists_mutex);
  	}
cf23422b9   minskey guo   cpu/mem hotplug: ...
1027
  out:
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
1028
  	unlock_memory_hotplug();
cf23422b9   minskey guo   cpu/mem hotplug: ...
1029
1030
  	return ret;
  }
27356f54c   Toshi Kani   mm/hotplug: verif...
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
  static int check_hotplug_memory_range(u64 start, u64 size)
  {
  	u64 start_pfn = start >> PAGE_SHIFT;
  	u64 nr_pages = size >> PAGE_SHIFT;
  
  	/* Memory range must be aligned with section */
  	if ((start_pfn & ~PAGE_SECTION_MASK) ||
  	    (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) {
  		pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx
  ",
  				(unsigned long long)start,
  				(unsigned long long)size);
  		return -EINVAL;
  	}
  
  	return 0;
  }
31168481c   Al Viro   meminit section w...
1048
1049
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  int __ref add_memory(int nid, u64 start, u64 size)
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1050
  {
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1051
  	pg_data_t *pgdat = NULL;
a1e565aa3   Tang Chen   memory-hotplug: d...
1052
1053
  	bool new_pgdat;
  	bool new_node;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
1054
  	struct resource *res;
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1055
  	int ret;
27356f54c   Toshi Kani   mm/hotplug: verif...
1056
1057
1058
  	ret = check_hotplug_memory_range(start, size);
  	if (ret)
  		return ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
1059
  	res = register_memory_resource(start, size);
6ad696d2c   Andi Kleen   mm: allow memory ...
1060
  	ret = -EEXIST;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
1061
  	if (!res)
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
1062
  		return ret;
ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
1063

a1e565aa3   Tang Chen   memory-hotplug: d...
1064
1065
1066
1067
  	{	/* Stupid hack to suppress address-never-null warning */
  		void *p = NODE_DATA(nid);
  		new_pgdat = !p;
  	}
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
1068
1069
  
  	lock_memory_hotplug();
a1e565aa3   Tang Chen   memory-hotplug: d...
1070
1071
  	new_node = !node_online(nid);
  	if (new_node) {
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1072
  		pgdat = hotadd_new_pgdat(nid, start);
6ad696d2c   Andi Kleen   mm: allow memory ...
1073
  		ret = -ENOMEM;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1074
  		if (!pgdat)
41b9e2d7e   Wen Congyang   mm/memory_hotplug...
1075
  			goto error;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1076
  	}
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1077
1078
  	/* call arch's memory hotadd */
  	ret = arch_add_memory(nid, start, size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1079
1080
  	if (ret < 0)
  		goto error;
0fc44159b   Yasunori Goto   [PATCH] Register ...
1081
  	/* we online node here. we can't roll back from here. */
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1082
  	node_set_online(nid);
a1e565aa3   Tang Chen   memory-hotplug: d...
1083
  	if (new_node) {
0fc44159b   Yasunori Goto   [PATCH] Register ...
1084
1085
1086
1087
1088
1089
1090
1091
  		ret = register_one_node(nid);
  		/*
  		 * If sysfs file of new node can't create, cpu on the node
  		 * can't be hot-added. There is no rollback way now.
  		 * So, check by BUG_ON() to catch it reluctantly..
  		 */
  		BUG_ON(ret);
  	}
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
1092
1093
  	/* create new memmap entry */
  	firmware_map_add_hotplug(start, start + size, "System RAM");
6ad696d2c   Andi Kleen   mm: allow memory ...
1094
  	goto out;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1095
1096
1097
1098
  error:
  	/* rollback pgdat allocation and others */
  	if (new_pgdat)
  		rollback_node_hotadd(nid, pgdat);
a864b9d06   Sasha Levin   mm: memory_hotplu...
1099
  	release_memory_resource(res);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1100

6ad696d2c   Andi Kleen   mm: allow memory ...
1101
  out:
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
1102
  	unlock_memory_hotplug();
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1103
1104
1105
  	return ret;
  }
  EXPORT_SYMBOL_GPL(add_memory);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1106
1107
1108
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  /*
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
   * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
   * set and the size of the free page is given by page_order(). Using this,
   * the function determines if the pageblock contains only free pages.
   * Due to buddy contraints, a free page at least the size of a pageblock will
   * be located at the start of the pageblock
   */
  static inline int pageblock_free(struct page *page)
  {
  	return PageBuddy(page) && page_order(page) >= pageblock_order;
  }
  
  /* Return the start of the next active pageblock after a given page */
  static struct page *next_active_pageblock(struct page *page)
  {
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1123
1124
  	/* Ensure the starting page is pageblock-aligned */
  	BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1125
  	/* If the entire pageblock is free, move to the end of free page */
0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1126
1127
1128
1129
1130
1131
1132
  	if (pageblock_free(page)) {
  		int order;
  		/* be careful. we don't have locks, page_order can be changed.*/
  		order = page_order(page);
  		if ((order < MAX_ORDER) && (order >= pageblock_order))
  			return page + (1 << order);
  	}
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1133

0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1134
  	return page + pageblock_nr_pages;
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1135
1136
1137
1138
1139
  }
  
  /* Checks if this range of memory is likely to be hot-removable. */
  int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
  {
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1140
1141
1142
1143
1144
  	struct page *page = pfn_to_page(start_pfn);
  	struct page *end_page = page + nr_pages;
  
  	/* Check the starting page of each pageblock within the range */
  	for (; page < end_page; page = next_active_pageblock(page)) {
49ac82558   KAMEZAWA Hiroyuki   memory hotplug: u...
1145
  		if (!is_pageblock_removable_nolock(page))
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1146
  			return 0;
49ac82558   KAMEZAWA Hiroyuki   memory hotplug: u...
1147
  		cond_resched();
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1148
1149
1150
1151
1152
1153
1154
  	}
  
  	/* All pageblocks in the memory block are likely to be hot-removable */
  	return 1;
  }
  
  /*
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
   * Confirm all pages in a range [start, end) is belongs to the same zone.
   */
  static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct zone *zone = NULL;
  	struct page *page;
  	int i;
  	for (pfn = start_pfn;
  	     pfn < end_pfn;
  	     pfn += MAX_ORDER_NR_PAGES) {
  		i = 0;
  		/* This is just a CONFIG_HOLES_IN_ZONE check.*/
  		while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
  			i++;
  		if (i == MAX_ORDER_NR_PAGES)
  			continue;
  		page = pfn_to_page(pfn + i);
  		if (zone && page_zone(page) != zone)
  			return 0;
  		zone = page_zone(page);
  	}
  	return 1;
  }
  
  /*
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1181
1182
1183
1184
   * Scan pfn range [start,end) to find movable/migratable pages (LRU pages
   * and hugepages). We scan pfn because it's much easier than scanning over
   * linked list. This function returns the pfn of the first found movable
   * page if it's found, otherwise 0.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1185
   */
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1186
  static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1187
1188
1189
1190
1191
1192
1193
1194
  {
  	unsigned long pfn;
  	struct page *page;
  	for (pfn = start; pfn < end; pfn++) {
  		if (pfn_valid(pfn)) {
  			page = pfn_to_page(pfn);
  			if (PageLRU(page))
  				return pfn;
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1195
1196
1197
1198
1199
1200
1201
  			if (PageHuge(page)) {
  				if (is_hugepage_active(page))
  					return pfn;
  				else
  					pfn = round_up(pfn + 1,
  						1 << compound_order(page)) - 1;
  			}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1202
1203
1204
1205
  		}
  	}
  	return 0;
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
  #define NR_OFFLINE_AT_ONCE_PAGES	(256)
  static int
  do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct page *page;
  	int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
  	int not_managed = 0;
  	int ret = 0;
  	LIST_HEAD(source);
  
  	for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
  		if (!pfn_valid(pfn))
  			continue;
  		page = pfn_to_page(pfn);
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
  
  		if (PageHuge(page)) {
  			struct page *head = compound_head(page);
  			pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
  			if (compound_order(head) > PFN_SECTION_SHIFT) {
  				ret = -EBUSY;
  				break;
  			}
  			if (isolate_huge_page(page, &source))
  				move_pages -= 1 << compound_order(head);
  			continue;
  		}
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1233
  		if (!get_page_unless_zero(page))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1234
1235
1236
1237
1238
  			continue;
  		/*
  		 * We can skip free pages. And we can only deal with pages on
  		 * LRU.
  		 */
62695a84e   Nick Piggin   vmscan: move isol...
1239
  		ret = isolate_lru_page(page);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1240
  		if (!ret) { /* Success */
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1241
  			put_page(page);
62695a84e   Nick Piggin   vmscan: move isol...
1242
  			list_add_tail(&page->lru, &source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1243
  			move_pages--;
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1244
1245
  			inc_zone_page_state(page, NR_ISOLATED_ANON +
  					    page_is_file_cache(page));
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1246
  		} else {
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1247
  #ifdef CONFIG_DEBUG_VM
718a38211   Wu Fengguang   mm: introduce dum...
1248
1249
1250
  			printk(KERN_ALERT "removing pfn %lx from LRU failed
  ",
  			       pfn);
f0b791a34   Dave Hansen   mm: print more de...
1251
  			dump_page(page, "failed to remove from LRU");
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1252
  #endif
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1253
  			put_page(page);
25985edce   Lucas De Marchi   Fix common misspe...
1254
  			/* Because we don't have big zone->lock. we should
809c44497   Bob Liu   mm: do_migrate_ra...
1255
1256
1257
  			   check this again here. */
  			if (page_count(page)) {
  				not_managed++;
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1258
  				ret = -EBUSY;
809c44497   Bob Liu   mm: do_migrate_ra...
1259
1260
  				break;
  			}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1261
1262
  		}
  	}
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1263
1264
  	if (!list_empty(&source)) {
  		if (not_managed) {
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1265
  			putback_movable_pages(&source);
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1266
1267
  			goto out;
  		}
74c08f982   Minchan Kim   memory-hotplug: d...
1268
1269
1270
1271
1272
1273
  
  		/*
  		 * alloc_migrate_target should be improooooved!!
  		 * migrate_pages returns # of failed pages.
  		 */
  		ret = migrate_pages(&source, alloc_migrate_target, 0,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1274
  					MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1275
  		if (ret)
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1276
  			putback_movable_pages(&source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1277
  	}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
  out:
  	return ret;
  }
  
  /*
   * remove from free_area[] and mark all as Reserved.
   */
  static int
  offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
  			void *data)
  {
  	__offline_isolated_pages(start, start + nr_pages);
  	return 0;
  }
  
  static void
  offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
1296
  	walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
  				offline_isolated_pages_cb);
  }
  
  /*
   * Check all pages in range, recoreded as memory resource, are isolated.
   */
  static int
  check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
  			void *data)
  {
  	int ret;
  	long offlined = *(long *)data;
b023f4681   Wen Congyang   memory-hotplug: s...
1309
  	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages, true);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
  	offlined = nr_pages;
  	if (!ret)
  		*(long *)data += offlined;
  	return ret;
  }
  
  static long
  check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
  {
  	long offlined = 0;
  	int ret;
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
1321
  	ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1322
1323
1324
1325
1326
  			check_pages_isolated_cb);
  	if (ret < 0)
  		offlined = (long)ret;
  	return offlined;
  }
09285af75   Lai Jiangshan   memory_hotplug: a...
1327
  #ifdef CONFIG_MOVABLE_NODE
79a4dcefd   Tang Chen   mm/memory_hotplug...
1328
1329
1330
1331
  /*
   * When CONFIG_MOVABLE_NODE, we permit offlining of a node which doesn't have
   * normal memory.
   */
09285af75   Lai Jiangshan   memory_hotplug: a...
1332
1333
1334
1335
  static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
  {
  	return true;
  }
79a4dcefd   Tang Chen   mm/memory_hotplug...
1336
  #else /* CONFIG_MOVABLE_NODE */
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
  /* ensure the node has NORMAL memory if it is still online */
  static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	unsigned long present_pages = 0;
  	enum zone_type zt;
  
  	for (zt = 0; zt <= ZONE_NORMAL; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  
  	if (present_pages > nr_pages)
  		return true;
  
  	present_pages = 0;
  	for (; zt <= ZONE_MOVABLE; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  
  	/*
  	 * we can't offline the last normal memory until all
  	 * higher memory is offlined.
  	 */
  	return present_pages == 0;
  }
79a4dcefd   Tang Chen   mm/memory_hotplug...
1360
  #endif /* CONFIG_MOVABLE_NODE */
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
1361

c5320926e   Tang Chen   mem-hotplug: intr...
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
  static int __init cmdline_parse_movable_node(char *p)
  {
  #ifdef CONFIG_MOVABLE_NODE
  	/*
  	 * Memory used by the kernel cannot be hot-removed because Linux
  	 * cannot migrate the kernel pages. When memory hotplug is
  	 * enabled, we should prevent memblock from allocating memory
  	 * for the kernel.
  	 *
  	 * ACPI SRAT records all hotpluggable memory ranges. But before
  	 * SRAT is parsed, we don't know about it.
  	 *
  	 * The kernel image is loaded into memory at very early time. We
  	 * cannot prevent this anyway. So on NUMA system, we set any
  	 * node the kernel resides in as un-hotpluggable.
  	 *
  	 * Since on modern servers, one node could have double-digit
  	 * gigabytes memory, we can assume the memory around the kernel
  	 * image is also un-hotpluggable. So before SRAT is parsed, just
  	 * allocate memory near the kernel image to try the best to keep
  	 * the kernel away from hotpluggable memory.
  	 */
  	memblock_set_bottom_up(true);
55ac590c2   Tang Chen   memblock, mem_hot...
1385
  	movable_node_enabled = true;
c5320926e   Tang Chen   mem-hotplug: intr...
1386
1387
1388
1389
1390
1391
1392
  #else
  	pr_warn("movable_node option not supported
  ");
  #endif
  	return 0;
  }
  early_param("movable_node", cmdline_parse_movable_node);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1393
1394
1395
1396
1397
1398
1399
1400
1401
  /* check which state of node_states will be changed when offline memory */
  static void node_states_check_changes_offline(unsigned long nr_pages,
  		struct zone *zone, struct memory_notify *arg)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	unsigned long present_pages = 0;
  	enum zone_type zt, zone_last = ZONE_NORMAL;
  
  	/*
6715ddf94   Lai Jiangshan   hotplug: update n...
1402
1403
1404
  	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_NORMAL,
  	 * set zone_last to ZONE_NORMAL.
d9713679d   Lai Jiangshan   memory_hotplug: f...
1405
  	 *
6715ddf94   Lai Jiangshan   hotplug: update n...
1406
1407
1408
  	 * If we don't have HIGHMEM nor movable node,
  	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
  	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
d9713679d   Lai Jiangshan   memory_hotplug: f...
1409
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
1410
  	if (N_MEMORY == N_NORMAL_MEMORY)
d9713679d   Lai Jiangshan   memory_hotplug: f...
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
  		zone_last = ZONE_MOVABLE;
  
  	/*
  	 * check whether node_states[N_NORMAL_MEMORY] will be changed.
  	 * If the memory to be offline is in a zone of 0...zone_last,
  	 * and it is the last present memory, 0...zone_last will
  	 * become empty after offline , thus we can determind we will
  	 * need to clear the node from node_states[N_NORMAL_MEMORY].
  	 */
  	for (zt = 0; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
  		arg->status_change_nid_normal = zone_to_nid(zone);
  	else
  		arg->status_change_nid_normal = -1;
6715ddf94   Lai Jiangshan   hotplug: update n...
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
  #ifdef CONFIG_HIGHMEM
  	/*
  	 * If we have movable node, node_states[N_HIGH_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
  	 * set zone_last to ZONE_HIGHMEM.
  	 *
  	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_MOVABLE,
  	 * set zone_last to ZONE_MOVABLE.
  	 */
  	zone_last = ZONE_HIGHMEM;
  	if (N_MEMORY == N_HIGH_MEMORY)
  		zone_last = ZONE_MOVABLE;
  
  	for (; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
  		arg->status_change_nid_high = zone_to_nid(zone);
  	else
  		arg->status_change_nid_high = -1;
  #else
  	arg->status_change_nid_high = arg->status_change_nid_normal;
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
  	/*
  	 * node_states[N_HIGH_MEMORY] contains nodes which have 0...ZONE_MOVABLE
  	 */
  	zone_last = ZONE_MOVABLE;
  
  	/*
  	 * check whether node_states[N_HIGH_MEMORY] will be changed
  	 * If we try to offline the last present @nr_pages from the node,
  	 * we can determind we will need to clear the node from
  	 * node_states[N_HIGH_MEMORY].
  	 */
  	for (; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (nr_pages >= present_pages)
  		arg->status_change_nid = zone_to_nid(zone);
  	else
  		arg->status_change_nid = -1;
  }
  
  static void node_states_clear_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_clear_state(node, N_NORMAL_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
1472
1473
  	if ((N_MEMORY != N_NORMAL_MEMORY) &&
  	    (arg->status_change_nid_high >= 0))
d9713679d   Lai Jiangshan   memory_hotplug: f...
1474
  		node_clear_state(node, N_HIGH_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
1475
1476
1477
1478
  
  	if ((N_MEMORY != N_HIGH_MEMORY) &&
  	    (arg->status_change_nid >= 0))
  		node_clear_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1479
  }
a16cee10c   Wen Congyang   memory-hotplug: p...
1480
  static int __ref __offline_pages(unsigned long start_pfn,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1481
1482
1483
1484
  		  unsigned long end_pfn, unsigned long timeout)
  {
  	unsigned long pfn, nr_pages, expire;
  	long offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
1485
  	int ret, drain, retry_max, node;
d702909f0   Cody P Schafer   memory_hotplug: u...
1486
  	unsigned long flags;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1487
  	struct zone *zone;
7b78d335a   Yasunori Goto   memory hotplug: r...
1488
  	struct memory_notify arg;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1489

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1490
1491
1492
1493
1494
1495
1496
1497
1498
  	/* at least, alignment against pageblock is necessary */
  	if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	/* This makes hotplug much easier...and readable.
  	   we assume this for now. .*/
  	if (!test_pages_in_a_zone(start_pfn, end_pfn))
  		return -EINVAL;
7b78d335a   Yasunori Goto   memory hotplug: r...
1499

20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
1500
  	lock_memory_hotplug();
6ad696d2c   Andi Kleen   mm: allow memory ...
1501

7b78d335a   Yasunori Goto   memory hotplug: r...
1502
1503
1504
  	zone = page_zone(pfn_to_page(start_pfn));
  	node = zone_to_nid(zone);
  	nr_pages = end_pfn - start_pfn;
74d42d8fe   Lai Jiangshan   memory_hotplug: e...
1505
1506
1507
  	ret = -EINVAL;
  	if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages))
  		goto out;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1508
  	/* set above range as isolated */
b023f4681   Wen Congyang   memory-hotplug: s...
1509
1510
  	ret = start_isolate_page_range(start_pfn, end_pfn,
  				       MIGRATE_MOVABLE, true);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1511
  	if (ret)
6ad696d2c   Andi Kleen   mm: allow memory ...
1512
  		goto out;
7b78d335a   Yasunori Goto   memory hotplug: r...
1513
1514
1515
  
  	arg.start_pfn = start_pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
1516
  	node_states_check_changes_offline(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
1517
1518
1519
1520
1521
  
  	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret)
  		goto failed_removal;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
  	pfn = start_pfn;
  	expire = jiffies + timeout;
  	drain = 0;
  	retry_max = 5;
  repeat:
  	/* start memory hot removal */
  	ret = -EAGAIN;
  	if (time_after(jiffies, expire))
  		goto failed_removal;
  	ret = -EINTR;
  	if (signal_pending(current))
  		goto failed_removal;
  	ret = 0;
  	if (drain) {
  		lru_add_drain_all();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1537
  		cond_resched();
9f8f21725   Christoph Lameter   Page allocator: c...
1538
  		drain_all_pages();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1539
  	}
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1540
1541
  	pfn = scan_movable_pages(start_pfn, end_pfn);
  	if (pfn) { /* We have movable pages */
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
  		ret = do_migrate_range(pfn, end_pfn);
  		if (!ret) {
  			drain = 1;
  			goto repeat;
  		} else {
  			if (ret < 0)
  				if (--retry_max == 0)
  					goto failed_removal;
  			yield();
  			drain = 1;
  			goto repeat;
  		}
  	}
b3834be5c   Adam Buchbinder   various: Fix spel...
1555
  	/* drain all zone's lru pagevec, this is asynchronous... */
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1556
  	lru_add_drain_all();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1557
  	yield();
b3834be5c   Adam Buchbinder   various: Fix spel...
1558
  	/* drain pcp pages, this is synchronous. */
9f8f21725   Christoph Lameter   Page allocator: c...
1559
  	drain_all_pages();
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1560
1561
1562
1563
1564
  	/*
  	 * dissolve free hugepages in the memory block before doing offlining
  	 * actually in order to make hugetlbfs's object counting consistent.
  	 */
  	dissolve_free_huge_pages(start_pfn, end_pfn);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1565
1566
1567
1568
1569
1570
1571
1572
  	/* check again */
  	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
  	if (offlined_pages < 0) {
  		ret = -EBUSY;
  		goto failed_removal;
  	}
  	printk(KERN_INFO "Offlined Pages %ld
  ", offlined_pages);
b3834be5c   Adam Buchbinder   various: Fix spel...
1573
  	/* Ok, all of our target is isolated.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1574
1575
  	   We cannot do rollback at this point. */
  	offline_isolated_pages(start_pfn, end_pfn);
dbc0e4cef   KAMEZAWA Hiroyuki   memory hotremove:...
1576
  	/* reset pagetype flags and makes migrate type to be MOVABLE */
0815f3d81   Michal Nazarewicz   mm: page_isolatio...
1577
  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1578
  	/* removal success */
3dcc0571c   Jiang Liu   mm: correctly upd...
1579
  	adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1580
  	zone->present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1581
1582
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1583
  	zone->zone_pgdat->node_present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1584
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
7b78d335a   Yasunori Goto   memory hotplug: r...
1585

1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
1586
  	init_per_zone_wmark_min();
1e8537baa   Xishi Qiu   memory-hotplug: b...
1587
  	if (!populated_zone(zone)) {
340175b7d   Jiang Liu   mm/hotplug: free ...
1588
  		zone_pcp_reset(zone);
1e8537baa   Xishi Qiu   memory-hotplug: b...
1589
1590
1591
1592
1593
  		mutex_lock(&zonelists_mutex);
  		build_all_zonelists(NULL, NULL);
  		mutex_unlock(&zonelists_mutex);
  	} else
  		zone_pcp_update(zone);
340175b7d   Jiang Liu   mm/hotplug: free ...
1594

d9713679d   Lai Jiangshan   memory_hotplug: f...
1595
1596
  	node_states_clear_node(node, &arg);
  	if (arg.status_change_nid >= 0)
8fe23e057   David Rientjes   mm: clear node in...
1597
  		kswapd_stop(node);
bce7394a3   Minchan Kim   page-allocator: r...
1598

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1599
1600
  	vm_total_pages = nr_free_pagecache_pages();
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
1601
1602
  
  	memory_notify(MEM_OFFLINE, &arg);
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
1603
  	unlock_memory_hotplug();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1604
1605
1606
  	return 0;
  
  failed_removal:
a62e2f4f5   Bjorn Helgaas   mm: print physica...
1607
1608
1609
1610
  	printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed
  ",
  	       (unsigned long long) start_pfn << PAGE_SHIFT,
  	       ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
7b78d335a   Yasunori Goto   memory hotplug: r...
1611
  	memory_notify(MEM_CANCEL_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1612
  	/* pushback to free area */
0815f3d81   Michal Nazarewicz   mm: page_isolatio...
1613
  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
7b78d335a   Yasunori Goto   memory hotplug: r...
1614

6ad696d2c   Andi Kleen   mm: allow memory ...
1615
  out:
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
1616
  	unlock_memory_hotplug();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1617
1618
  	return ret;
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1619

a16cee10c   Wen Congyang   memory-hotplug: p...
1620
1621
1622
1623
  int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
  {
  	return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ);
  }
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1624
  #endif /* CONFIG_MEMORY_HOTREMOVE */
a16cee10c   Wen Congyang   memory-hotplug: p...
1625

bbc76be67   Wen Congyang   memory-hotplug: r...
1626
1627
1628
  /**
   * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn)
   * @start_pfn: start pfn of the memory range
e05c4bbfa   Toshi Kani   mm: walk_memory_r...
1629
   * @end_pfn: end pfn of the memory range
bbc76be67   Wen Congyang   memory-hotplug: r...
1630
1631
1632
1633
1634
1635
1636
1637
   * @arg: argument passed to func
   * @func: callback for each memory section walked
   *
   * This function walks through all present mem sections in range
   * [start_pfn, end_pfn) and call func on each mem section.
   *
   * Returns the return value of func.
   */
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1638
  int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
bbc76be67   Wen Congyang   memory-hotplug: r...
1639
  		void *arg, int (*func)(struct memory_block *, void *))
71088785c   Badari Pulavarty   mm: cleanup to ma...
1640
  {
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1641
1642
  	struct memory_block *mem = NULL;
  	struct mem_section *section;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1643
1644
  	unsigned long pfn, section_nr;
  	int ret;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		section_nr = pfn_to_section_nr(pfn);
  		if (!present_section_nr(section_nr))
  			continue;
  
  		section = __nr_to_section(section_nr);
  		/* same memblock? */
  		if (mem)
  			if ((section_nr >= mem->start_section_nr) &&
  			    (section_nr <= mem->end_section_nr))
  				continue;
  
  		mem = find_memory_block_hinted(section, mem);
  		if (!mem)
  			continue;
bbc76be67   Wen Congyang   memory-hotplug: r...
1661
  		ret = func(mem, arg);
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1662
  		if (ret) {
bbc76be67   Wen Congyang   memory-hotplug: r...
1663
1664
  			kobject_put(&mem->dev.kobj);
  			return ret;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1665
1666
1667
1668
1669
  		}
  	}
  
  	if (mem)
  		kobject_put(&mem->dev.kobj);
bbc76be67   Wen Congyang   memory-hotplug: r...
1670
1671
  	return 0;
  }
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1672
  #ifdef CONFIG_MEMORY_HOTREMOVE
d6de9d534   Xishi Qiu   mm/memory_hotplug...
1673
  static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
bbc76be67   Wen Congyang   memory-hotplug: r...
1674
1675
  {
  	int ret = !is_memblock_offlined(mem);
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1676
1677
1678
1679
1680
  	if (unlikely(ret)) {
  		phys_addr_t beginpa, endpa;
  
  		beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
  		endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
bbc76be67   Wen Congyang   memory-hotplug: r...
1681
  		pr_warn("removing memory fails, because memory "
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1682
1683
1684
1685
  			"[%pa-%pa] is onlined
  ",
  			&beginpa, &endpa);
  	}
bbc76be67   Wen Congyang   memory-hotplug: r...
1686
1687
1688
  
  	return ret;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1689
  static int check_cpu_on_node(pg_data_t *pgdat)
60a5a19e7   Tang Chen   memory-hotplug: r...
1690
  {
60a5a19e7   Tang Chen   memory-hotplug: r...
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
  	int cpu;
  
  	for_each_present_cpu(cpu) {
  		if (cpu_to_node(cpu) == pgdat->node_id)
  			/*
  			 * the cpu on this node isn't removed, and we can't
  			 * offline this node.
  			 */
  			return -EBUSY;
  	}
  
  	return 0;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1704
  static void unmap_cpu_on_node(pg_data_t *pgdat)
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1705
1706
  {
  #ifdef CONFIG_ACPI_NUMA
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1707
1708
1709
1710
1711
1712
1713
  	int cpu;
  
  	for_each_possible_cpu(cpu)
  		if (cpu_to_node(cpu) == pgdat->node_id)
  			numa_clear_node(cpu);
  #endif
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1714
  static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1715
  {
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1716
  	int ret;
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1717

0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1718
  	ret = check_cpu_on_node(pgdat);
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1719
1720
1721
1722
1723
1724
1725
  	if (ret)
  		return ret;
  
  	/*
  	 * the node will be offlined when we come here, so we can clear
  	 * the cpu_to_node() now.
  	 */
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1726
  	unmap_cpu_on_node(pgdat);
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1727
1728
  	return 0;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1729
1730
1731
1732
1733
1734
1735
1736
  /**
   * try_offline_node
   *
   * Offline a node if all memory sections and cpus of the node are removed.
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call.
   */
90b30cdc1   Wen Congyang   memory-hotplug: e...
1737
  void try_offline_node(int nid)
60a5a19e7   Tang Chen   memory-hotplug: r...
1738
  {
d822b86a9   Wen Congyang   memory-hotplug: f...
1739
1740
1741
  	pg_data_t *pgdat = NODE_DATA(nid);
  	unsigned long start_pfn = pgdat->node_start_pfn;
  	unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
60a5a19e7   Tang Chen   memory-hotplug: r...
1742
  	unsigned long pfn;
d822b86a9   Wen Congyang   memory-hotplug: f...
1743
1744
  	struct page *pgdat_page = virt_to_page(pgdat);
  	int i;
60a5a19e7   Tang Chen   memory-hotplug: r...
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		unsigned long section_nr = pfn_to_section_nr(pfn);
  
  		if (!present_section_nr(section_nr))
  			continue;
  
  		if (pfn_to_nid(pfn) != nid)
  			continue;
  
  		/*
  		 * some memory sections of this node are not removed, and we
  		 * can't offline node now.
  		 */
  		return;
  	}
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1761
  	if (check_and_unmap_cpu_on_node(pgdat))
60a5a19e7   Tang Chen   memory-hotplug: r...
1762
1763
1764
1765
1766
1767
1768
1769
  		return;
  
  	/*
  	 * all memory/cpu of this node are removed, we can offline this
  	 * node now.
  	 */
  	node_set_offline(nid);
  	unregister_one_node(nid);
d822b86a9   Wen Congyang   memory-hotplug: f...
1770
1771
1772
1773
1774
1775
1776
1777
  
  	if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page))
  		/* node data is allocated from boot memory */
  		return;
  
  	/* free waittable in each zone */
  	for (i = 0; i < MAX_NR_ZONES; i++) {
  		struct zone *zone = pgdat->node_zones + i;
ca4b3f302   Jianguo Wu   mm/hotplug: only ...
1778
1779
1780
1781
1782
  		/*
  		 * wait_table may be allocated from boot memory,
  		 * here only free if it's allocated by vmalloc.
  		 */
  		if (is_vmalloc_addr(zone->wait_table))
d822b86a9   Wen Congyang   memory-hotplug: f...
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
  			vfree(zone->wait_table);
  	}
  
  	/*
  	 * Since there is no way to guarentee the address of pgdat/zone is not
  	 * on stack of any kernel threads or used by other kernel objects
  	 * without reference counting or other symchronizing method, do not
  	 * reset node_data and free pgdat here. Just reset it to 0 and reuse
  	 * the memory when the node is online again.
  	 */
  	memset(pgdat, 0, sizeof(*pgdat));
60a5a19e7   Tang Chen   memory-hotplug: r...
1794
  }
90b30cdc1   Wen Congyang   memory-hotplug: e...
1795
  EXPORT_SYMBOL(try_offline_node);
60a5a19e7   Tang Chen   memory-hotplug: r...
1796

0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1797
1798
1799
1800
1801
1802
1803
  /**
   * remove_memory
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call, as required by
   * try_offline_node().
   */
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1804
  void __ref remove_memory(int nid, u64 start, u64 size)
bbc76be67   Wen Congyang   memory-hotplug: r...
1805
  {
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1806
  	int ret;
993c1aad8   Wen Congyang   memory-hotplug: t...
1807

27356f54c   Toshi Kani   mm/hotplug: verif...
1808
  	BUG_ON(check_hotplug_memory_range(start, size));
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1809
1810
1811
  	lock_memory_hotplug();
  
  	/*
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1812
1813
1814
  	 * All memory blocks must be offlined before removing memory.  Check
  	 * whether all memory blocks in question are offline and trigger a BUG()
  	 * if this is not the case.
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1815
  	 */
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1816
  	ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
d6de9d534   Xishi Qiu   mm/memory_hotplug...
1817
  				check_memblock_offlined_cb);
bbc76be67   Wen Congyang   memory-hotplug: r...
1818
1819
  	if (ret) {
  		unlock_memory_hotplug();
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1820
  		BUG();
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1821
  	}
46c66c4b7   Yasuaki Ishimatsu   memory-hotplug: r...
1822
1823
  	/* remove memmap entry */
  	firmware_map_remove(start, start + size, "System RAM");
24d335ca3   Wen Congyang   memory-hotplug: i...
1824
  	arch_remove_memory(start, size);
60a5a19e7   Tang Chen   memory-hotplug: r...
1825
  	try_offline_node(nid);
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1826
  	unlock_memory_hotplug();
71088785c   Badari Pulavarty   mm: cleanup to ma...
1827
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1828
  EXPORT_SYMBOL_GPL(remove_memory);
aba6efc47   Rafael J. Wysocki   Memory hotplug: M...
1829
  #endif /* CONFIG_MEMORY_HOTREMOVE */