Blame view

mm/memory_hotplug.c 49.4 KB
3947be196   Dave Hansen   [PATCH] memory ho...
1
2
3
4
5
  /*
   *  linux/mm/memory_hotplug.c
   *
   *  Copyright (C)
   */
3947be196   Dave Hansen   [PATCH] memory ho...
6
7
  #include <linux/stddef.h>
  #include <linux/mm.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
8
  #include <linux/sched/signal.h>
3947be196   Dave Hansen   [PATCH] memory ho...
9
10
11
  #include <linux/swap.h>
  #include <linux/interrupt.h>
  #include <linux/pagemap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
12
  #include <linux/compiler.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
13
  #include <linux/export.h>
3947be196   Dave Hansen   [PATCH] memory ho...
14
  #include <linux/pagevec.h>
2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
15
  #include <linux/writeback.h>
3947be196   Dave Hansen   [PATCH] memory ho...
16
17
18
19
  #include <linux/slab.h>
  #include <linux/sysctl.h>
  #include <linux/cpu.h>
  #include <linux/memory.h>
4b94ffdc4   Dan Williams   x86, mm: introduc...
20
  #include <linux/memremap.h>
3947be196   Dave Hansen   [PATCH] memory ho...
21
22
23
  #include <linux/memory_hotplug.h>
  #include <linux/highmem.h>
  #include <linux/vmalloc.h>
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
24
  #include <linux/ioport.h>
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
25
26
27
  #include <linux/delay.h>
  #include <linux/migrate.h>
  #include <linux/page-isolation.h>
71088785c   Badari Pulavarty   mm: cleanup to ma...
28
  #include <linux/pfn.h>
6ad696d2c   Andi Kleen   mm: allow memory ...
29
  #include <linux/suspend.h>
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
30
  #include <linux/mm_inline.h>
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
31
  #include <linux/firmware-map.h>
60a5a19e7   Tang Chen   memory-hotplug: r...
32
  #include <linux/stop_machine.h>
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
33
  #include <linux/hugetlb.h>
c5320926e   Tang Chen   mem-hotplug: intr...
34
  #include <linux/memblock.h>
f784a3f19   Tang Chen   mem-hotplug: rese...
35
  #include <linux/bootmem.h>
698b1b306   Vlastimil Babka   mm, compaction: i...
36
  #include <linux/compaction.h>
3947be196   Dave Hansen   [PATCH] memory ho...
37
38
  
  #include <asm/tlbflush.h>
1e5ad9a3b   Adrian Bunk   mm/memory_hotplug...
39
  #include "internal.h"
9d0ad8ca4   Daniel Kiper   mm: extend memory...
40
41
42
43
44
45
46
47
48
49
  /*
   * online_page_callback contains pointer to current page onlining function.
   * Initially it is generic_online_page(). If it is required it could be
   * changed by calling set_online_page_callback() for callback registration
   * and restore_online_page_callback() for generic callback restore.
   */
  
  static void generic_online_page(struct page *page);
  
  static online_page_callback_t online_page_callback = generic_online_page;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
50
  static DEFINE_MUTEX(online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
51

3f906ba23   Thomas Gleixner   mm/memory-hotplug...
52
  DEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
53

3f906ba23   Thomas Gleixner   mm/memory-hotplug...
54
55
56
57
  void get_online_mems(void)
  {
  	percpu_down_read(&mem_hotplug_lock);
  }
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
58

3f906ba23   Thomas Gleixner   mm/memory-hotplug...
59
60
61
62
  void put_online_mems(void)
  {
  	percpu_up_read(&mem_hotplug_lock);
  }
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
63

4932381ee   Michal Hocko   mm, memory_hotplu...
64
  bool movable_node_enabled = false;
8604d9e53   Vitaly Kuznetsov   memory_hotplug: i...
65
  #ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
66
  bool memhp_auto_online;
8604d9e53   Vitaly Kuznetsov   memory_hotplug: i...
67
68
69
  #else
  bool memhp_auto_online = true;
  #endif
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
70
  EXPORT_SYMBOL_GPL(memhp_auto_online);
86dd995d6   Vitaly Kuznetsov   memory_hotplug: i...
71
72
73
74
75
76
77
78
79
80
  static int __init setup_memhp_default_state(char *str)
  {
  	if (!strcmp(str, "online"))
  		memhp_auto_online = true;
  	else if (!strcmp(str, "offline"))
  		memhp_auto_online = false;
  
  	return 1;
  }
  __setup("memhp_default_state=", setup_memhp_default_state);
30467e0b3   David Rientjes   mm, hotplug: fix ...
81
  void mem_hotplug_begin(void)
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
82
  {
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
83
84
  	cpus_read_lock();
  	percpu_down_write(&mem_hotplug_lock);
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
85
  }
30467e0b3   David Rientjes   mm, hotplug: fix ...
86
  void mem_hotplug_done(void)
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
87
  {
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
88
89
  	percpu_up_write(&mem_hotplug_lock);
  	cpus_read_unlock();
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
90
  }
20d6c96b5   KOSAKI Motohiro   mem-hotplug: intr...
91

45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
92
93
94
  /* add this memory to iomem resource */
  static struct resource *register_memory_resource(u64 start, u64 size)
  {
5042db43c   Jérôme Glisse   mm/ZONE_DEVICE: n...
95
  	struct resource *res, *conflict;
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
96
  	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
6f754ba4c   Vitaly Kuznetsov   memory-hotplug: d...
97
98
  	if (!res)
  		return ERR_PTR(-ENOMEM);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
99
100
101
102
  
  	res->name = "System RAM";
  	res->start = start;
  	res->end = start + size - 1;
782b86641   Toshi Kani   xen, mm: Set IORE...
103
  	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
5042db43c   Jérôme Glisse   mm/ZONE_DEVICE: n...
104
105
106
107
108
109
110
111
  	conflict =  request_resource_conflict(&iomem_resource, res);
  	if (conflict) {
  		if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) {
  			pr_debug("Device unaddressable memory block "
  				 "memory hotplug at %#010llx !
  ",
  				 (unsigned long long)start);
  		}
4996eed86   Toshi Kani   mm/memory_hotplug...
112
113
  		pr_debug("System RAM resource %pR cannot be added
  ", res);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
114
  		kfree(res);
6f754ba4c   Vitaly Kuznetsov   memory-hotplug: d...
115
  		return ERR_PTR(-EEXIST);
45e0b78b0   Keith Mannthey   [PATCH] hot-add-m...
116
117
118
119
120
121
122
123
124
125
126
127
  	}
  	return res;
  }
  
  static void release_memory_resource(struct resource *res)
  {
  	if (!res)
  		return;
  	release_resource(res);
  	kfree(res);
  	return;
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
128
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
129
130
  void get_page_bootmem(unsigned long info,  struct page *page,
  		      unsigned long type)
047532787   Yasunori Goto   memory hotplug: r...
131
  {
ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
132
  	page->freelist = (void *)type;
047532787   Yasunori Goto   memory hotplug: r...
133
134
  	SetPagePrivate(page);
  	set_page_private(page, info);
fe896d187   Joonsoo Kim   mm: introduce pag...
135
  	page_ref_inc(page);
047532787   Yasunori Goto   memory hotplug: r...
136
  }
170a5a7eb   Jiang Liu   mm: make __free_p...
137
  void put_page_bootmem(struct page *page)
047532787   Yasunori Goto   memory hotplug: r...
138
  {
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
139
  	unsigned long type;
047532787   Yasunori Goto   memory hotplug: r...
140

ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
141
  	type = (unsigned long) page->freelist;
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
142
143
  	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
  	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
047532787   Yasunori Goto   memory hotplug: r...
144

fe896d187   Joonsoo Kim   mm: introduce pag...
145
  	if (page_ref_dec_return(page) == 1) {
ddffe98d1   Yasuaki Ishimatsu   mm/memory_hotplug...
146
  		page->freelist = NULL;
047532787   Yasunori Goto   memory hotplug: r...
147
148
  		ClearPagePrivate(page);
  		set_page_private(page, 0);
5f24ce5fd   Andrea Arcangeli   thp: remove PG_buddy
149
  		INIT_LIST_HEAD(&page->lru);
170a5a7eb   Jiang Liu   mm: make __free_p...
150
  		free_reserved_page(page);
047532787   Yasunori Goto   memory hotplug: r...
151
  	}
047532787   Yasunori Goto   memory hotplug: r...
152
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
153
154
  #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
  #ifndef CONFIG_SPARSEMEM_VMEMMAP
d92bc3185   Adrian Bunk   mm: make register...
155
  static void register_page_bootmem_info_section(unsigned long start_pfn)
047532787   Yasunori Goto   memory hotplug: r...
156
157
158
159
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
047532787   Yasunori Goto   memory hotplug: r...
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	/* Get section's memmap address */
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	/*
  	 * Get page for the memmap's phys address
  	 * XXX: need more consideration for sparse_vmemmap...
  	 */
  	page = virt_to_page(memmap);
  	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
  	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
  
  	/* remember memmap's page */
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, SECTION_INFO);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
af370fb8c   Yasunori Goto   memory hotplug: s...
184
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
047532787   Yasunori Goto   memory hotplug: r...
185
186
  
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
  #else /* CONFIG_SPARSEMEM_VMEMMAP */
  static void register_page_bootmem_info_section(unsigned long start_pfn)
  {
  	unsigned long *usemap, mapsize, section_nr, i;
  	struct mem_section *ms;
  	struct page *page, *memmap;
  
  	if (!pfn_valid(start_pfn))
  		return;
  
  	section_nr = pfn_to_section_nr(start_pfn);
  	ms = __nr_to_section(section_nr);
  
  	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
  
  	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
  
  	usemap = __nr_to_section(section_nr)->pageblock_flags;
  	page = virt_to_page(usemap);
  
  	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
  
  	for (i = 0; i < mapsize; i++, page++)
  		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
  }
  #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
047532787   Yasunori Goto   memory hotplug: r...
213

7ded384a1   Linus Torvalds   mm: fix section m...
214
  void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
047532787   Yasunori Goto   memory hotplug: r...
215
216
217
218
  {
  	unsigned long i, pfn, end_pfn, nr_pages;
  	int node = pgdat->node_id;
  	struct page *page;
047532787   Yasunori Goto   memory hotplug: r...
219
220
221
222
223
224
  
  	nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
  	page = virt_to_page(pgdat);
  
  	for (i = 0; i < nr_pages; i++, page++)
  		get_page_bootmem(node, page, NODE_INFO);
047532787   Yasunori Goto   memory hotplug: r...
225
  	pfn = pgdat->node_start_pfn;
c1f194952   Cody P Schafer   mm/memory_hotplug...
226
  	end_pfn = pgdat_end_pfn(pgdat);
047532787   Yasunori Goto   memory hotplug: r...
227

7e9f5eb03   Tang Chen   mm/memory_hotplug...
228
  	/* register section info */
f14851af0   qiuxishi   memory hotplug: f...
229
230
231
232
233
  	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		/*
  		 * Some platforms can assign the same pfn to multiple nodes - on
  		 * node0 as well as nodeN.  To avoid registering a pfn against
  		 * multiple nodes we check that this pfn does not already
7e9f5eb03   Tang Chen   mm/memory_hotplug...
234
  		 * reside in some other nodes.
f14851af0   qiuxishi   memory hotplug: f...
235
  		 */
f65e91df2   Yang Shi   mm: use early_pfn...
236
  		if (pfn_valid(pfn) && (early_pfn_to_nid(pfn) == node))
f14851af0   qiuxishi   memory hotplug: f...
237
238
  			register_page_bootmem_info_section(pfn);
  	}
047532787   Yasunori Goto   memory hotplug: r...
239
  }
46723bfa5   Yasuaki Ishimatsu   memory-hotplug: i...
240
  #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
047532787   Yasunori Goto   memory hotplug: r...
241

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
242
243
  static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
  		bool want_memblock)
3947be196   Dave Hansen   [PATCH] memory ho...
244
  {
3947be196   Dave Hansen   [PATCH] memory ho...
245
  	int ret;
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
246
  	int i;
3947be196   Dave Hansen   [PATCH] memory ho...
247

ebd15302d   KAMEZAWA Hiroyuki   [PATCH] memory ho...
248
249
  	if (pfn_valid(phys_start_pfn))
  		return -EEXIST;
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
250
  	ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn);
3947be196   Dave Hansen   [PATCH] memory ho...
251
252
  	if (ret < 0)
  		return ret;
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
253
254
255
256
257
258
259
260
261
262
263
  	/*
  	 * Make all the pages reserved so that nobody will stumble over half
  	 * initialized state.
  	 * FIXME: We also have to associate it with a node because pfn_to_node
  	 * relies on having page with the proper node.
  	 */
  	for (i = 0; i < PAGES_PER_SECTION; i++) {
  		unsigned long pfn = phys_start_pfn + i;
  		struct page *page;
  		if (!pfn_valid(pfn))
  			continue;
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
264

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
265
266
267
268
  		page = pfn_to_page(pfn);
  		set_page_node(page, nid);
  		SetPageReserved(page);
  	}
718127cc3   Yasunori Goto   [PATCH] wait_tabl...
269

1b862aecf   Michal Hocko   mm, memory_hotplu...
270
271
  	if (!want_memblock)
  		return 0;
c04fc586c   Gary Hade   mm: show node to ...
272
  	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
3947be196   Dave Hansen   [PATCH] memory ho...
273
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
274
275
276
277
278
279
  /*
   * Reasonably generic function for adding memory.  It is
   * expected that archs that support memory hotplug will
   * call this function after deciding the zone to which to
   * add the new pages.
   */
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
280
  int __ref __add_pages(int nid, unsigned long phys_start_pfn,
1b862aecf   Michal Hocko   mm, memory_hotplu...
281
  			unsigned long nr_pages, bool want_memblock)
4edd7ceff   David Rientjes   mm, hotplug: avoi...
282
283
284
285
  {
  	unsigned long i;
  	int err = 0;
  	int start_sec, end_sec;
4b94ffdc4   Dan Williams   x86, mm: introduc...
286
  	struct vmem_altmap *altmap;
4edd7ceff   David Rientjes   mm, hotplug: avoi...
287
288
289
  	/* during initialize mem_map, align hot-added range to section */
  	start_sec = pfn_to_section_nr(phys_start_pfn);
  	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
4b94ffdc4   Dan Williams   x86, mm: introduc...
290
291
292
293
294
295
296
297
298
  	altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
  	if (altmap) {
  		/*
  		 * Validate altmap is within bounds of the total request
  		 */
  		if (altmap->base_pfn != phys_start_pfn
  				|| vmem_altmap_offset(altmap) > nr_pages) {
  			pr_warn_once("memory add fail, invalid altmap
  ");
7cf91a98e   Joonsoo Kim   mm/compaction: sp...
299
300
  			err = -EINVAL;
  			goto out;
4b94ffdc4   Dan Williams   x86, mm: introduc...
301
302
303
  		}
  		altmap->alloc = 0;
  	}
4edd7ceff   David Rientjes   mm, hotplug: avoi...
304
  	for (i = start_sec; i <= end_sec; i++) {
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
305
  		err = __add_section(nid, section_nr_to_pfn(i), want_memblock);
4edd7ceff   David Rientjes   mm, hotplug: avoi...
306
307
308
309
310
311
312
313
314
  
  		/*
  		 * EEXIST is finally dealt with by ioresource collision
  		 * check. see add_memory() => register_memory_resource()
  		 * Warning will be printed if there is collision.
  		 */
  		if (err && (err != -EEXIST))
  			break;
  		err = 0;
f64ac5e6e   Michal Hocko   mm, memory_hotplu...
315
  		cond_resched();
4edd7ceff   David Rientjes   mm, hotplug: avoi...
316
  	}
c435a3905   Zhu Guihua   mm/memory hotplug...
317
  	vmemmap_populate_print_last();
7cf91a98e   Joonsoo Kim   mm/compaction: sp...
318
  out:
4edd7ceff   David Rientjes   mm, hotplug: avoi...
319
320
321
322
323
  	return err;
  }
  EXPORT_SYMBOL_GPL(__add_pages);
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
324
  /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
d09b0137d   YASUAKI ISHIMATSU   mm/memory_hotplug...
325
  static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
  				     unsigned long start_pfn,
  				     unsigned long end_pfn)
  {
  	struct mem_section *ms;
  
  	for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(start_pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (unlikely(pfn_to_nid(start_pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(start_pfn)))
  			continue;
  
  		return start_pfn;
  	}
  
  	return 0;
  }
  
  /* find the biggest valid pfn in the range [start_pfn, end_pfn). */
d09b0137d   YASUAKI ISHIMATSU   mm/memory_hotplug...
350
  static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
  				    unsigned long start_pfn,
  				    unsigned long end_pfn)
  {
  	struct mem_section *ms;
  	unsigned long pfn;
  
  	/* pfn is the end pfn of a memory section. */
  	pfn = end_pfn - 1;
  	for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (unlikely(pfn_to_nid(pfn) != nid))
  			continue;
  
  		if (zone && zone != page_zone(pfn_to_page(pfn)))
  			continue;
  
  		return pfn;
  	}
  
  	return 0;
  }
  
  static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
  			     unsigned long end_pfn)
  {
c33bc315f   Xishi Qiu   mm: use zone_end_...
380
381
382
  	unsigned long zone_start_pfn = zone->zone_start_pfn;
  	unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
  	unsigned long zone_end_pfn = z;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
  	unsigned long pfn;
  	struct mem_section *ms;
  	int nid = zone_to_nid(zone);
  
  	zone_span_writelock(zone);
  	if (zone_start_pfn == start_pfn) {
  		/*
  		 * If the section is smallest section in the zone, it need
  		 * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
  		 * In this case, we find second smallest valid mem_section
  		 * for shrinking zone.
  		 */
  		pfn = find_smallest_section_pfn(nid, zone, end_pfn,
  						zone_end_pfn);
  		if (pfn) {
  			zone->zone_start_pfn = pfn;
  			zone->spanned_pages = zone_end_pfn - pfn;
  		}
  	} else if (zone_end_pfn == end_pfn) {
  		/*
  		 * If the section is biggest section in the zone, it need
  		 * shrink zone->spanned_pages.
  		 * In this case, we find second biggest valid mem_section for
  		 * shrinking zone.
  		 */
  		pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
  					       start_pfn);
  		if (pfn)
  			zone->spanned_pages = pfn - zone_start_pfn + 1;
  	}
  
  	/*
  	 * The section is not biggest or smallest mem_section in the zone, it
  	 * only creates a hole in the zone. So in this case, we need not
  	 * change the zone. But perhaps, the zone has only hole data. Thus
  	 * it check the zone has only hole or not.
  	 */
  	pfn = zone_start_pfn;
  	for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (page_zone(pfn_to_page(pfn)) != zone)
  			continue;
  
  		 /* If the section is current section, it continues the loop */
  		if (start_pfn == pfn)
  			continue;
  
  		/* If we find valid section, we have nothing to do */
  		zone_span_writeunlock(zone);
  		return;
  	}
  
  	/* The zone has no valid section */
  	zone->zone_start_pfn = 0;
  	zone->spanned_pages = 0;
  	zone_span_writeunlock(zone);
  }
  
  static void shrink_pgdat_span(struct pglist_data *pgdat,
  			      unsigned long start_pfn, unsigned long end_pfn)
  {
83285c72e   Xishi Qiu   mm: use pgdat_end...
448
449
450
  	unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
  	unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
  	unsigned long pgdat_end_pfn = p;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
  	unsigned long pfn;
  	struct mem_section *ms;
  	int nid = pgdat->node_id;
  
  	if (pgdat_start_pfn == start_pfn) {
  		/*
  		 * If the section is smallest section in the pgdat, it need
  		 * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
  		 * In this case, we find second smallest valid mem_section
  		 * for shrinking zone.
  		 */
  		pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
  						pgdat_end_pfn);
  		if (pfn) {
  			pgdat->node_start_pfn = pfn;
  			pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
  		}
  	} else if (pgdat_end_pfn == end_pfn) {
  		/*
  		 * If the section is biggest section in the pgdat, it need
  		 * shrink pgdat->node_spanned_pages.
  		 * In this case, we find second biggest valid mem_section for
  		 * shrinking zone.
  		 */
  		pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
  					       start_pfn);
  		if (pfn)
  			pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
  	}
  
  	/*
  	 * If the section is not biggest or smallest mem_section in the pgdat,
  	 * it only creates a hole in the pgdat. So in this case, we need not
  	 * change the pgdat.
  	 * But perhaps, the pgdat has only hole data. Thus it check the pgdat
  	 * has only hole or not.
  	 */
  	pfn = pgdat_start_pfn;
  	for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) {
  		ms = __pfn_to_section(pfn);
  
  		if (unlikely(!valid_section(ms)))
  			continue;
  
  		if (pfn_to_nid(pfn) != nid)
  			continue;
  
  		 /* If the section is current section, it continues the loop */
  		if (start_pfn == pfn)
  			continue;
  
  		/* If we find valid section, we have nothing to do */
  		return;
  	}
  
  	/* The pgdat has no valid section */
  	pgdat->node_start_pfn = 0;
  	pgdat->node_spanned_pages = 0;
  }
  
  static void __remove_zone(struct zone *zone, unsigned long start_pfn)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nr_pages = PAGES_PER_SECTION;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
515
  	unsigned long flags;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
516
517
518
519
520
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
  	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
  	shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
  }
4b94ffdc4   Dan Williams   x86, mm: introduc...
521
522
  static int __remove_section(struct zone *zone, struct mem_section *ms,
  		unsigned long map_offset)
ea01ea937   Badari Pulavarty   hotplug memory re...
523
  {
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
524
525
  	unsigned long start_pfn;
  	int scn_nr;
ea01ea937   Badari Pulavarty   hotplug memory re...
526
527
528
529
530
531
532
533
  	int ret = -EINVAL;
  
  	if (!valid_section(ms))
  		return ret;
  
  	ret = unregister_memory_section(ms);
  	if (ret)
  		return ret;
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
534
  	scn_nr = __section_nr(ms);
1dd2bfc86   YASUAKI ISHIMATSU   mm/memory_hotplug...
535
  	start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
815121d2b   Yasuaki Ishimatsu   memory_hotplug: c...
536
  	__remove_zone(zone, start_pfn);
4b94ffdc4   Dan Williams   x86, mm: introduc...
537
  	sparse_remove_one_section(zone, ms, map_offset);
ea01ea937   Badari Pulavarty   hotplug memory re...
538
539
  	return 0;
  }
ea01ea937   Badari Pulavarty   hotplug memory re...
540
541
542
543
544
545
546
547
548
549
550
551
552
553
  /**
   * __remove_pages() - remove sections of pages from a zone
   * @zone: zone from which pages need to be removed
   * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
   * @nr_pages: number of pages to remove (must be multiple of section size)
   *
   * Generic helper function to remove section mappings and sysfs entries
   * for the section of the memory we are removing. Caller needs to make
   * sure that pages are marked reserved and zones are adjust properly by
   * calling offline_pages().
   */
  int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
  		 unsigned long nr_pages)
  {
fe74ebb10   Toshi Kani   mm: change __remo...
554
  	unsigned long i;
4b94ffdc4   Dan Williams   x86, mm: introduc...
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
  	unsigned long map_offset = 0;
  	int sections_to_remove, ret = 0;
  
  	/* In the ZONE_DEVICE case device driver owns the memory region */
  	if (is_dev_zone(zone)) {
  		struct page *page = pfn_to_page(phys_start_pfn);
  		struct vmem_altmap *altmap;
  
  		altmap = to_vmem_altmap((unsigned long) page);
  		if (altmap)
  			map_offset = vmem_altmap_offset(altmap);
  	} else {
  		resource_size_t start, size;
  
  		start = phys_start_pfn << PAGE_SHIFT;
  		size = nr_pages * PAGE_SIZE;
  
  		ret = release_mem_region_adjustable(&iomem_resource, start,
  					size);
  		if (ret) {
  			resource_size_t endres = start + size - 1;
  
  			pr_warn("Unable to release resource <%pa-%pa> (%d)
  ",
  					&start, &endres, ret);
  		}
  	}
ea01ea937   Badari Pulavarty   hotplug memory re...
582

7cf91a98e   Joonsoo Kim   mm/compaction: sp...
583
  	clear_zone_contiguous(zone);
ea01ea937   Badari Pulavarty   hotplug memory re...
584
585
586
587
588
  	/*
  	 * We can only remove entire sections
  	 */
  	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
  	BUG_ON(nr_pages % PAGES_PER_SECTION);
ea01ea937   Badari Pulavarty   hotplug memory re...
589
590
591
  	sections_to_remove = nr_pages / PAGES_PER_SECTION;
  	for (i = 0; i < sections_to_remove; i++) {
  		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
4b94ffdc4   Dan Williams   x86, mm: introduc...
592
593
594
  
  		ret = __remove_section(zone, __pfn_to_section(pfn), map_offset);
  		map_offset = 0;
ea01ea937   Badari Pulavarty   hotplug memory re...
595
596
597
  		if (ret)
  			break;
  	}
7cf91a98e   Joonsoo Kim   mm/compaction: sp...
598
599
  
  	set_zone_contiguous(zone);
ea01ea937   Badari Pulavarty   hotplug memory re...
600
601
  	return ret;
  }
4edd7ceff   David Rientjes   mm, hotplug: avoi...
602
  #endif /* CONFIG_MEMORY_HOTREMOVE */
ea01ea937   Badari Pulavarty   hotplug memory re...
603

9d0ad8ca4   Daniel Kiper   mm: extend memory...
604
605
606
  int set_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
607
608
  	get_online_mems();
  	mutex_lock(&online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
609
610
611
612
613
  
  	if (online_page_callback == generic_online_page) {
  		online_page_callback = callback;
  		rc = 0;
  	}
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
614
615
  	mutex_unlock(&online_page_callback_lock);
  	put_online_mems();
9d0ad8ca4   Daniel Kiper   mm: extend memory...
616
617
618
619
620
621
622
623
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(set_online_page_callback);
  
  int restore_online_page_callback(online_page_callback_t callback)
  {
  	int rc = -EINVAL;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
624
625
  	get_online_mems();
  	mutex_lock(&online_page_callback_lock);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
626
627
628
629
630
  
  	if (online_page_callback == callback) {
  		online_page_callback = generic_online_page;
  		rc = 0;
  	}
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
631
632
  	mutex_unlock(&online_page_callback_lock);
  	put_online_mems();
9d0ad8ca4   Daniel Kiper   mm: extend memory...
633
634
635
636
637
638
  
  	return rc;
  }
  EXPORT_SYMBOL_GPL(restore_online_page_callback);
  
  void __online_page_set_limits(struct page *page)
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
639
  {
9d0ad8ca4   Daniel Kiper   mm: extend memory...
640
641
642
643
644
  }
  EXPORT_SYMBOL_GPL(__online_page_set_limits);
  
  void __online_page_increment_counters(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
645
  	adjust_managed_page_count(page, 1);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
646
647
  }
  EXPORT_SYMBOL_GPL(__online_page_increment_counters);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
648

9d0ad8ca4   Daniel Kiper   mm: extend memory...
649
650
  void __online_page_free(struct page *page)
  {
3dcc0571c   Jiang Liu   mm: correctly upd...
651
  	__free_reserved_page(page);
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
652
  }
9d0ad8ca4   Daniel Kiper   mm: extend memory...
653
654
655
656
657
658
659
660
  EXPORT_SYMBOL_GPL(__online_page_free);
  
  static void generic_online_page(struct page *page)
  {
  	__online_page_set_limits(page);
  	__online_page_increment_counters(page);
  	__online_page_free(page);
  }
180c06efc   Jeremy Fitzhardinge   hotplug-memory: m...
661

75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
662
663
  static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
  			void *arg)
3947be196   Dave Hansen   [PATCH] memory ho...
664
665
  {
  	unsigned long i;
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
666
667
  	unsigned long onlined_pages = *(unsigned long *)arg;
  	struct page *page;
2d070eab2   Michal Hocko   mm: consider zone...
668

75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
669
670
671
  	if (PageReserved(pfn_to_page(start_pfn)))
  		for (i = 0; i < nr_pages; i++) {
  			page = pfn_to_page(start_pfn + i);
9d0ad8ca4   Daniel Kiper   mm: extend memory...
672
  			(*online_page_callback)(page);
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
673
674
  			onlined_pages++;
  		}
2d070eab2   Michal Hocko   mm: consider zone...
675
676
  
  	online_mem_sections(start_pfn, start_pfn + nr_pages);
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
677
678
679
  	*(unsigned long *)arg = onlined_pages;
  	return 0;
  }
d9713679d   Lai Jiangshan   memory_hotplug: f...
680
681
682
683
684
685
686
687
  /* check which state of node_states will be changed when online memory */
  static void node_states_check_changes_online(unsigned long nr_pages,
  	struct zone *zone, struct memory_notify *arg)
  {
  	int nid = zone_to_nid(zone);
  	enum zone_type zone_last = ZONE_NORMAL;
  
  	/*
6715ddf94   Lai Jiangshan   hotplug: update n...
688
689
690
  	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_NORMAL,
  	 * set zone_last to ZONE_NORMAL.
d9713679d   Lai Jiangshan   memory_hotplug: f...
691
  	 *
6715ddf94   Lai Jiangshan   hotplug: update n...
692
693
694
  	 * If we don't have HIGHMEM nor movable node,
  	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
  	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
d9713679d   Lai Jiangshan   memory_hotplug: f...
695
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
696
  	if (N_MEMORY == N_NORMAL_MEMORY)
d9713679d   Lai Jiangshan   memory_hotplug: f...
697
698
699
700
701
702
703
704
705
706
707
708
  		zone_last = ZONE_MOVABLE;
  
  	/*
  	 * if the memory to be online is in a zone of 0...zone_last, and
  	 * the zones of 0...zone_last don't have memory before online, we will
  	 * need to set the node to node_states[N_NORMAL_MEMORY] after
  	 * the memory is online.
  	 */
  	if (zone_idx(zone) <= zone_last && !node_state(nid, N_NORMAL_MEMORY))
  		arg->status_change_nid_normal = nid;
  	else
  		arg->status_change_nid_normal = -1;
6715ddf94   Lai Jiangshan   hotplug: update n...
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
  #ifdef CONFIG_HIGHMEM
  	/*
  	 * If we have movable node, node_states[N_HIGH_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
  	 * set zone_last to ZONE_HIGHMEM.
  	 *
  	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_MOVABLE,
  	 * set zone_last to ZONE_MOVABLE.
  	 */
  	zone_last = ZONE_HIGHMEM;
  	if (N_MEMORY == N_HIGH_MEMORY)
  		zone_last = ZONE_MOVABLE;
  
  	if (zone_idx(zone) <= zone_last && !node_state(nid, N_HIGH_MEMORY))
  		arg->status_change_nid_high = nid;
  	else
  		arg->status_change_nid_high = -1;
  #else
  	arg->status_change_nid_high = arg->status_change_nid_normal;
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
730
731
  	/*
  	 * if the node don't have memory befor online, we will need to
6715ddf94   Lai Jiangshan   hotplug: update n...
732
  	 * set the node to node_states[N_MEMORY] after the memory
d9713679d   Lai Jiangshan   memory_hotplug: f...
733
734
  	 * is online.
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
735
  	if (!node_state(nid, N_MEMORY))
d9713679d   Lai Jiangshan   memory_hotplug: f...
736
737
738
739
740
741
742
743
744
  		arg->status_change_nid = nid;
  	else
  		arg->status_change_nid = -1;
  }
  
  static void node_states_set_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_set_state(node, N_NORMAL_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
745
746
747
748
  	if (arg->status_change_nid_high >= 0)
  		node_set_state(node, N_HIGH_MEMORY);
  
  	node_set_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
749
  }
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
  static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
  		unsigned long nr_pages)
  {
  	unsigned long old_end_pfn = zone_end_pfn(zone);
  
  	if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
  		zone->zone_start_pfn = start_pfn;
  
  	zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
  }
  
  static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
                                       unsigned long nr_pages)
  {
  	unsigned long old_end_pfn = pgdat_end_pfn(pgdat);
  
  	if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
  		pgdat->node_start_pfn = start_pfn;
  
  	pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
  }
cdf72f250   Michal Hocko   mm, memory_hotplu...
771
  void __ref move_pfn_range_to_zone(struct zone *zone,
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
772
773
774
775
776
  		unsigned long start_pfn, unsigned long nr_pages)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	int nid = pgdat->node_id;
  	unsigned long flags;
df429ac03   Reza Arbab   memory-hotplug: m...
777

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
778
779
  	if (zone_is_empty(zone))
  		init_currently_empty_zone(zone, start_pfn, nr_pages);
df429ac03   Reza Arbab   memory-hotplug: m...
780

f1dd2cd13   Michal Hocko   mm, memory_hotplu...
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
  	clear_zone_contiguous(zone);
  
  	/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
  	pgdat_resize_lock(pgdat, &flags);
  	zone_span_writelock(zone);
  	resize_zone_range(zone, start_pfn, nr_pages);
  	zone_span_writeunlock(zone);
  	resize_pgdat_range(pgdat, start_pfn, nr_pages);
  	pgdat_resize_unlock(pgdat, &flags);
  
  	/*
  	 * TODO now we have a visible range of pages which are not associated
  	 * with their zone properly. Not nice but set_pfnblock_flags_mask
  	 * expects the zone spans the pfn range. All the pages in the range
  	 * are reserved so nobody should be touching them so we should be safe
  	 */
  	memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG);
  
  	set_zone_contiguous(zone);
  }
  
  /*
c246a213f   Michal Hocko   mm, memory_hotplu...
803
804
805
806
   * Returns a default kernel memory zone for the given pfn range.
   * If no kernel zone covers this pfn range it will automatically go
   * to the ZONE_NORMAL.
   */
c6f03e290   Michal Hocko   mm, memory_hotplu...
807
  static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn,
c246a213f   Michal Hocko   mm, memory_hotplu...
808
809
810
811
812
813
814
815
816
817
818
819
820
821
  		unsigned long nr_pages)
  {
  	struct pglist_data *pgdat = NODE_DATA(nid);
  	int zid;
  
  	for (zid = 0; zid <= ZONE_NORMAL; zid++) {
  		struct zone *zone = &pgdat->node_zones[zid];
  
  		if (zone_intersects(zone, start_pfn, nr_pages))
  			return zone;
  	}
  
  	return &pgdat->node_zones[ZONE_NORMAL];
  }
c6f03e290   Michal Hocko   mm, memory_hotplu...
822
823
  static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
  		unsigned long nr_pages)
e5e689302   Michal Hocko   mm, memory_hotplu...
824
  {
c6f03e290   Michal Hocko   mm, memory_hotplu...
825
826
827
828
829
  	struct zone *kernel_zone = default_kernel_zone_for_pfn(nid, start_pfn,
  			nr_pages);
  	struct zone *movable_zone = &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
  	bool in_kernel = zone_intersects(kernel_zone, start_pfn, nr_pages);
  	bool in_movable = zone_intersects(movable_zone, start_pfn, nr_pages);
e5e689302   Michal Hocko   mm, memory_hotplu...
830
831
  
  	/*
c6f03e290   Michal Hocko   mm, memory_hotplu...
832
833
  	 * We inherit the existing zone in a simple case where zones do not
  	 * overlap in the given range
e5e689302   Michal Hocko   mm, memory_hotplu...
834
  	 */
c6f03e290   Michal Hocko   mm, memory_hotplu...
835
836
  	if (in_kernel ^ in_movable)
  		return (in_kernel) ? kernel_zone : movable_zone;
9f123ab54   Michal Hocko   mm, memory_hotplu...
837

c6f03e290   Michal Hocko   mm, memory_hotplu...
838
839
840
841
842
843
  	/*
  	 * If the range doesn't belong to any zone or two zones overlap in the
  	 * given range then we use movable zone only if movable_node is
  	 * enabled because we always online to a kernel zone by default.
  	 */
  	return movable_node_enabled ? movable_zone : kernel_zone;
9f123ab54   Michal Hocko   mm, memory_hotplu...
844
  }
e5e689302   Michal Hocko   mm, memory_hotplu...
845
846
  struct zone * zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
  		unsigned long nr_pages)
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
847
  {
c6f03e290   Michal Hocko   mm, memory_hotplu...
848
849
  	if (online_type == MMOP_ONLINE_KERNEL)
  		return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
850

c6f03e290   Michal Hocko   mm, memory_hotplu...
851
852
  	if (online_type == MMOP_ONLINE_MOVABLE)
  		return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
df429ac03   Reza Arbab   memory-hotplug: m...
853

c6f03e290   Michal Hocko   mm, memory_hotplu...
854
  	return default_zone_for_pfn(nid, start_pfn, nr_pages);
e5e689302   Michal Hocko   mm, memory_hotplu...
855
856
857
858
859
860
861
862
863
864
865
866
  }
  
  /*
   * Associates the given pfn range with the given node and the zone appropriate
   * for the given online type.
   */
  static struct zone * __meminit move_pfn_range(int online_type, int nid,
  		unsigned long start_pfn, unsigned long nr_pages)
  {
  	struct zone *zone;
  
  	zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
867
868
  	move_pfn_range_to_zone(zone, start_pfn, nr_pages);
  	return zone;
df429ac03   Reza Arbab   memory-hotplug: m...
869
  }
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
870

b93e0f329   Michal Hocko   mm, memory_hotplu...
871
  /* Must be protected by mem_hotplug_begin() or a device_lock */
511c2aba8   Lai Jiangshan   mm, memory-hotplu...
872
  int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
873
  {
aa47228a1   Cody P Schafer   memory_hotplug: u...
874
  	unsigned long flags;
3947be196   Dave Hansen   [PATCH] memory ho...
875
876
  	unsigned long onlined_pages = 0;
  	struct zone *zone;
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
877
  	int need_zonelists_rebuild = 0;
7b78d335a   Yasunori Goto   memory hotplug: r...
878
879
880
  	int nid;
  	int ret;
  	struct memory_notify arg;
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
881
  	nid = pfn_to_nid(pfn);
f1dd2cd13   Michal Hocko   mm, memory_hotplu...
882
883
  	/* associate pfn range with the zone */
  	zone = move_pfn_range(online_type, nid, pfn, nr_pages);
7b78d335a   Yasunori Goto   memory hotplug: r...
884
885
  	arg.start_pfn = pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
886
  	node_states_check_changes_online(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
887

7b78d335a   Yasunori Goto   memory hotplug: r...
888
889
  	ret = memory_notify(MEM_GOING_ONLINE, &arg);
  	ret = notifier_to_errno(ret);
e33e33b4d   Chen Yucong   mm, memory hotplu...
890
891
  	if (ret)
  		goto failed_addition;
3947be196   Dave Hansen   [PATCH] memory ho...
892
  	/*
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
893
894
895
896
  	 * If this zone is not populated, then it is not in zonelist.
  	 * This means the page allocator ignores this zone.
  	 * So, zonelist must be updated after online.
  	 */
6dcd73d70   Wen Congyang   memory-hotplug: a...
897
  	if (!populated_zone(zone)) {
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
898
  		need_zonelists_rebuild = 1;
72675e131   Michal Hocko   mm, memory_hotplu...
899
  		setup_zone_pageset(zone);
6dcd73d70   Wen Congyang   memory-hotplug: a...
900
  	}
6811378e7   Yasunori Goto   [PATCH] wait_tabl...
901

908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
902
  	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
75884fb1c   KAMEZAWA Hiroyuki   memory unplug: me...
903
  		online_pages_range);
fd8a4221a   Geoff Levand   memory_hotplug: c...
904
  	if (ret) {
6dcd73d70   Wen Congyang   memory-hotplug: a...
905
906
  		if (need_zonelists_rebuild)
  			zone_pcp_reset(zone);
e33e33b4d   Chen Yucong   mm, memory hotplu...
907
  		goto failed_addition;
fd8a4221a   Geoff Levand   memory_hotplug: c...
908
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
909
  	zone->present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
910
911
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
f2937be58   Yasunori Goto   [PATCH] memory ho...
912
  	zone->zone_pgdat->node_present_pages += onlined_pages;
aa47228a1   Cody P Schafer   memory_hotplug: u...
913
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
914
  	if (onlined_pages) {
e888ca354   Vlastimil Babka   mm, memory hotplu...
915
  		node_states_set_node(nid, &arg);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
916
  		if (need_zonelists_rebuild)
72675e131   Michal Hocko   mm, memory_hotplu...
917
  			build_all_zonelists(NULL);
08dff7b7d   Jiang Liu   mm/hotplug: corre...
918
919
920
  		else
  			zone_pcp_update(zone);
  	}
3947be196   Dave Hansen   [PATCH] memory ho...
921

1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
922
  	init_per_zone_wmark_min();
698b1b306   Vlastimil Babka   mm, compaction: i...
923
  	if (onlined_pages) {
e888ca354   Vlastimil Babka   mm, memory hotplu...
924
  		kswapd_run(nid);
698b1b306   Vlastimil Babka   mm, compaction: i...
925
926
  		kcompactd_run(nid);
  	}
61b13993a   Dave Hansen   [PATCH] memory ho...
927

1f522509c   Haicheng Li   mem-hotplug: avoi...
928
  	vm_total_pages = nr_free_pagecache_pages();
2f7f24eca   Kent Liu   memory-hotplug: d...
929

2d1d43f6a   Chandra Seetharaman   [PATCH] call mm/p...
930
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
931
932
933
  
  	if (onlined_pages)
  		memory_notify(MEM_ONLINE, &arg);
30467e0b3   David Rientjes   mm, hotplug: fix ...
934
  	return 0;
e33e33b4d   Chen Yucong   mm, memory hotplu...
935
936
937
938
939
940
941
942
  
  failed_addition:
  	pr_debug("online_pages [mem %#010llx-%#010llx] failed
  ",
  		 (unsigned long long) pfn << PAGE_SHIFT,
  		 (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
  	memory_notify(MEM_CANCEL_ONLINE, &arg);
  	return ret;
3947be196   Dave Hansen   [PATCH] memory ho...
943
  }
53947027a   Keith Mannthey   [PATCH] hot-add-m...
944
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
945

0bd854200   Tang Chen   mem-hotplug: rese...
946
947
948
949
950
951
952
953
954
  static void reset_node_present_pages(pg_data_t *pgdat)
  {
  	struct zone *z;
  
  	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
  		z->present_pages = 0;
  
  	pgdat->node_present_pages = 0;
  }
e13193319   Hidetoshi Seto   mm/memory_hotplug...
955
956
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
957
958
959
960
  {
  	struct pglist_data *pgdat;
  	unsigned long zones_size[MAX_NR_ZONES] = {0};
  	unsigned long zholes_size[MAX_NR_ZONES] = {0};
c8e861a53   Fabian Frederick   mm/memory_hotplug...
961
  	unsigned long start_pfn = PFN_DOWN(start);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
962

a1e565aa3   Tang Chen   memory-hotplug: d...
963
964
965
966
967
  	pgdat = NODE_DATA(nid);
  	if (!pgdat) {
  		pgdat = arch_alloc_nodedata(nid);
  		if (!pgdat)
  			return NULL;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
968

a1e565aa3   Tang Chen   memory-hotplug: d...
969
  		arch_refresh_nodedata(nid, pgdat);
b0dc3a342   Gu Zheng   mm/memory hotplug...
970
  	} else {
e716f2eb2   Mel Gorman   mm, vmscan: preve...
971
972
973
974
975
  		/*
  		 * Reset the nr_zones, order and classzone_idx before reuse.
  		 * Note that kswapd will init kswapd_classzone_idx properly
  		 * when it starts in the near future.
  		 */
b0dc3a342   Gu Zheng   mm/memory hotplug...
976
  		pgdat->nr_zones = 0;
38087d9b0   Mel Gorman   mm, vmscan: simpl...
977
978
  		pgdat->kswapd_order = 0;
  		pgdat->kswapd_classzone_idx = 0;
a1e565aa3   Tang Chen   memory-hotplug: d...
979
  	}
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
980
981
982
983
  
  	/* we can use NODE_DATA(nid) from here */
  
  	/* init node's zones as empty zones, we don't have any present pages.*/
9109fb7b3   Johannes Weiner   mm: drop unneeded...
984
  	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
5830169f4   Reza Arbab   mm/memory_hotplug...
985
  	pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
986

959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
987
988
989
990
  	/*
  	 * The node we allocated has no zone fallback lists. For avoiding
  	 * to access not-initialized zonelist, build here.
  	 */
72675e131   Michal Hocko   mm, memory_hotplu...
991
  	build_all_zonelists(pgdat);
959ecc48f   KAMEZAWA Hiroyuki   mm/memory_hotplug...
992

f784a3f19   Tang Chen   mem-hotplug: rese...
993
994
995
996
997
998
999
  	/*
  	 * zone->managed_pages is set to an approximate value in
  	 * free_area_init_core(), which will cause
  	 * /sys/device/system/node/nodeX/meminfo has wrong data.
  	 * So reset it to 0 before any memory is onlined.
  	 */
  	reset_node_managed_pages(pgdat);
0bd854200   Tang Chen   mem-hotplug: rese...
1000
1001
1002
1003
1004
1005
  	/*
  	 * When memory is hot-added, all the memory is in offline state. So
  	 * clear all zones' present_pages because they will be updated in
  	 * online_pages() and offline_pages().
  	 */
  	reset_node_present_pages(pgdat);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1006
1007
1008
1009
1010
1011
  	return pgdat;
  }
  
  static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
  {
  	arch_refresh_nodedata(nid, NULL);
5830169f4   Reza Arbab   mm/memory_hotplug...
1012
  	free_percpu(pgdat->per_cpu_nodestats);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1013
1014
1015
  	arch_free_nodedata(pgdat);
  	return;
  }
0a5470390   KAMEZAWA Hiroyuki   [PATCH] register ...
1016

01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1017
1018
1019
  /**
   * try_online_node - online a node if offlined
   *
cf23422b9   minskey guo   cpu/mem hotplug: ...
1020
1021
   * called by cpu_up() to online a node without onlined memory.
   */
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1022
  int try_online_node(int nid)
cf23422b9   minskey guo   cpu/mem hotplug: ...
1023
1024
1025
  {
  	pg_data_t	*pgdat;
  	int	ret;
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1026
1027
  	if (node_online(nid))
  		return 0;
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1028
  	mem_hotplug_begin();
cf23422b9   minskey guo   cpu/mem hotplug: ...
1029
  	pgdat = hotadd_new_pgdat(nid, 0);
7553e8f2d   David Rientjes   mm, hotplug: fix ...
1030
  	if (!pgdat) {
01b0f1970   Toshi Kani   cpu/mem hotplug: ...
1031
1032
  		pr_err("Cannot online node %d due to NULL pgdat
  ", nid);
cf23422b9   minskey guo   cpu/mem hotplug: ...
1033
1034
1035
1036
1037
1038
  		ret = -ENOMEM;
  		goto out;
  	}
  	node_set_online(nid);
  	ret = register_one_node(nid);
  	BUG_ON(ret);
cf23422b9   minskey guo   cpu/mem hotplug: ...
1039
  out:
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1040
  	mem_hotplug_done();
cf23422b9   minskey guo   cpu/mem hotplug: ...
1041
1042
  	return ret;
  }
27356f54c   Toshi Kani   mm/hotplug: verif...
1043
1044
  static int check_hotplug_memory_range(u64 start, u64 size)
  {
c8e861a53   Fabian Frederick   mm/memory_hotplug...
1045
  	u64 start_pfn = PFN_DOWN(start);
27356f54c   Toshi Kani   mm/hotplug: verif...
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
  	u64 nr_pages = size >> PAGE_SHIFT;
  
  	/* Memory range must be aligned with section */
  	if ((start_pfn & ~PAGE_SECTION_MASK) ||
  	    (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) {
  		pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx
  ",
  				(unsigned long long)start,
  				(unsigned long long)size);
  		return -EINVAL;
  	}
  
  	return 0;
  }
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1060
1061
  static int online_memory_block(struct memory_block *mem, void *arg)
  {
dc18d706a   Nathan Fontenot   memory-hotplug: u...
1062
  	return device_online(&mem->dev);
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1063
  }
31168481c   Al Viro   meminit section w...
1064
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1065
  int __ref add_memory_resource(int nid, struct resource *res, bool online)
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1066
  {
62cedb9f1   David Vrabel   mm: memory hotplu...
1067
  	u64 start, size;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1068
  	pg_data_t *pgdat = NULL;
a1e565aa3   Tang Chen   memory-hotplug: d...
1069
1070
  	bool new_pgdat;
  	bool new_node;
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1071
  	int ret;
62cedb9f1   David Vrabel   mm: memory hotplu...
1072
1073
  	start = res->start;
  	size = resource_size(res);
27356f54c   Toshi Kani   mm/hotplug: verif...
1074
1075
1076
  	ret = check_hotplug_memory_range(start, size);
  	if (ret)
  		return ret;
a1e565aa3   Tang Chen   memory-hotplug: d...
1077
1078
1079
1080
  	{	/* Stupid hack to suppress address-never-null warning */
  		void *p = NODE_DATA(nid);
  		new_pgdat = !p;
  	}
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
1081

bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1082
  	mem_hotplug_begin();
ac13c4622   Nathan Zimmer   mm/memory_hotplug...
1083

7f36e3e56   Tang Chen   memory-hotplug: a...
1084
1085
1086
1087
1088
1089
1090
  	/*
  	 * Add new range to memblock so that when hotadd_new_pgdat() is called
  	 * to allocate new pgdat, get_pfn_range_for_nid() will be able to find
  	 * this new range and calculate total pages correctly.  The range will
  	 * be removed at hot-remove time.
  	 */
  	memblock_add_node(start, size, nid);
a1e565aa3   Tang Chen   memory-hotplug: d...
1091
1092
  	new_node = !node_online(nid);
  	if (new_node) {
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1093
  		pgdat = hotadd_new_pgdat(nid, start);
6ad696d2c   Andi Kleen   mm: allow memory ...
1094
  		ret = -ENOMEM;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1095
  		if (!pgdat)
41b9e2d7e   Wen Congyang   mm/memory_hotplug...
1096
  			goto error;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1097
  	}
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1098
  	/* call arch's memory hotadd */
3d79a728f   Michal Hocko   mm, memory_hotplu...
1099
  	ret = arch_add_memory(nid, start, size, true);
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1100

9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1101
1102
  	if (ret < 0)
  		goto error;
0fc44159b   Yasunori Goto   [PATCH] Register ...
1103
  	/* we online node here. we can't roll back from here. */
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1104
  	node_set_online(nid);
a1e565aa3   Tang Chen   memory-hotplug: d...
1105
  	if (new_node) {
9037a9934   Michal Hocko   mm, memory_hotplu...
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
  		unsigned long start_pfn = start >> PAGE_SHIFT;
  		unsigned long nr_pages = size >> PAGE_SHIFT;
  
  		ret = __register_one_node(nid);
  		if (ret)
  			goto register_fail;
  
  		/*
  		 * link memory sections under this node. This is already
  		 * done when creatig memory section in register_new_memory
  		 * but that depends to have the node registered so offline
  		 * nodes have to go through register_node.
  		 * TODO clean up this mess.
  		 */
  		ret = link_mem_sections(nid, start_pfn, nr_pages);
  register_fail:
0fc44159b   Yasunori Goto   [PATCH] Register ...
1122
1123
1124
1125
1126
1127
1128
  		/*
  		 * If sysfs file of new node can't create, cpu on the node
  		 * can't be hot-added. There is no rollback way now.
  		 * So, check by BUG_ON() to catch it reluctantly..
  		 */
  		BUG_ON(ret);
  	}
d96ae5309   akpm@linux-foundation.org   memory-hotplug: c...
1129
1130
  	/* create new memmap entry */
  	firmware_map_add_hotplug(start, start + size, "System RAM");
31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1131
1132
1133
1134
  	/* online pages if requested */
  	if (online)
  		walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
  				  NULL, online_memory_block);
6ad696d2c   Andi Kleen   mm: allow memory ...
1135
  	goto out;
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1136
1137
  error:
  	/* rollback pgdat allocation and others */
dbac61a3f   Gustavo A. R. Silva   mm/memory_hotplug...
1138
  	if (new_pgdat && pgdat)
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1139
  		rollback_node_hotadd(nid, pgdat);
7f36e3e56   Tang Chen   memory-hotplug: a...
1140
  	memblock_remove(start, size);
9af3c2dea   Yasunori Goto   [PATCH] pgdat all...
1141

6ad696d2c   Andi Kleen   mm: allow memory ...
1142
  out:
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1143
  	mem_hotplug_done();
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1144
1145
  	return ret;
  }
62cedb9f1   David Vrabel   mm: memory hotplu...
1146
1147
1148
1149
1150
1151
1152
1153
  EXPORT_SYMBOL_GPL(add_memory_resource);
  
  int __ref add_memory(int nid, u64 start, u64 size)
  {
  	struct resource *res;
  	int ret;
  
  	res = register_memory_resource(start, size);
6f754ba4c   Vitaly Kuznetsov   memory-hotplug: d...
1154
1155
  	if (IS_ERR(res))
  		return PTR_ERR(res);
62cedb9f1   David Vrabel   mm: memory hotplu...
1156

31bc3858e   Vitaly Kuznetsov   memory-hotplug: a...
1157
  	ret = add_memory_resource(nid, res, memhp_auto_online);
62cedb9f1   David Vrabel   mm: memory hotplu...
1158
1159
1160
1161
  	if (ret < 0)
  		release_memory_resource(res);
  	return ret;
  }
bc02af93d   Yasunori Goto   [PATCH] pgdat all...
1162
  EXPORT_SYMBOL_GPL(add_memory);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1163
1164
1165
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  /*
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
   * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
   * set and the size of the free page is given by page_order(). Using this,
   * the function determines if the pageblock contains only free pages.
   * Due to buddy contraints, a free page at least the size of a pageblock will
   * be located at the start of the pageblock
   */
  static inline int pageblock_free(struct page *page)
  {
  	return PageBuddy(page) && page_order(page) >= pageblock_order;
  }
  
  /* Return the start of the next active pageblock after a given page */
  static struct page *next_active_pageblock(struct page *page)
  {
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1180
1181
  	/* Ensure the starting page is pageblock-aligned */
  	BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1182
  	/* If the entire pageblock is free, move to the end of free page */
0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1183
1184
1185
1186
1187
1188
1189
  	if (pageblock_free(page)) {
  		int order;
  		/* be careful. we don't have locks, page_order can be changed.*/
  		order = page_order(page);
  		if ((order < MAX_ORDER) && (order >= pageblock_order))
  			return page + (1 << order);
  	}
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1190

0dcc48c15   KAMEZAWA Hiroyuki   memory hotplug: f...
1191
  	return page + pageblock_nr_pages;
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1192
1193
1194
  }
  
  /* Checks if this range of memory is likely to be hot-removable. */
c98940f6f   Yaowei Bai   mm/memory_hotplug...
1195
  bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1196
  {
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1197
1198
1199
1200
1201
  	struct page *page = pfn_to_page(start_pfn);
  	struct page *end_page = page + nr_pages;
  
  	/* Check the starting page of each pageblock within the range */
  	for (; page < end_page; page = next_active_pageblock(page)) {
49ac82558   KAMEZAWA Hiroyuki   memory hotplug: u...
1202
  		if (!is_pageblock_removable_nolock(page))
c98940f6f   Yaowei Bai   mm/memory_hotplug...
1203
  			return false;
49ac82558   KAMEZAWA Hiroyuki   memory hotplug: u...
1204
  		cond_resched();
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1205
1206
1207
  	}
  
  	/* All pageblocks in the memory block are likely to be hot-removable */
c98940f6f   Yaowei Bai   mm/memory_hotplug...
1208
  	return true;
5c755e9fd   Badari Pulavarty   memory-hotplug: a...
1209
1210
1211
  }
  
  /*
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1212
   * Confirm all pages in a range [start, end) belong to the same zone.
a96dfddbc   Toshi Kani   base/memory, hotp...
1213
   * When true, return its valid [start, end).
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1214
   */
a96dfddbc   Toshi Kani   base/memory, hotp...
1215
1216
  int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
  			 unsigned long *valid_start, unsigned long *valid_end)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1217
  {
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1218
  	unsigned long pfn, sec_end_pfn;
a96dfddbc   Toshi Kani   base/memory, hotp...
1219
  	unsigned long start, end;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1220
1221
1222
  	struct zone *zone = NULL;
  	struct page *page;
  	int i;
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1223
  	for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1224
  	     pfn < end_pfn;
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1225
  	     pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1226
1227
  		/* Make sure the memory section is present first */
  		if (!present_section_nr(pfn_to_section_nr(pfn)))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1228
  			continue;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1229
1230
1231
1232
1233
1234
1235
  		for (; pfn < sec_end_pfn && pfn < end_pfn;
  		     pfn += MAX_ORDER_NR_PAGES) {
  			i = 0;
  			/* This is just a CONFIG_HOLES_IN_ZONE check.*/
  			while ((i < MAX_ORDER_NR_PAGES) &&
  				!pfn_valid_within(pfn + i))
  				i++;
d6d8c8a48   zhong jiang   mm/memory_hotplug...
1236
  			if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1237
1238
1239
1240
  				continue;
  			page = pfn_to_page(pfn + i);
  			if (zone && page_zone(page) != zone)
  				return 0;
a96dfddbc   Toshi Kani   base/memory, hotp...
1241
1242
  			if (!zone)
  				start = pfn + i;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1243
  			zone = page_zone(page);
a96dfddbc   Toshi Kani   base/memory, hotp...
1244
  			end = pfn + MAX_ORDER_NR_PAGES;
5f0f2887f   Andrew Banman   mm/memory_hotplug...
1245
  		}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1246
  	}
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1247

a96dfddbc   Toshi Kani   base/memory, hotp...
1248
1249
  	if (zone) {
  		*valid_start = start;
d6d8c8a48   zhong jiang   mm/memory_hotplug...
1250
  		*valid_end = min(end, end_pfn);
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1251
  		return 1;
a96dfddbc   Toshi Kani   base/memory, hotp...
1252
  	} else {
deb88a2a1   Toshi Kani   mm/memory_hotplug...
1253
  		return 0;
a96dfddbc   Toshi Kani   base/memory, hotp...
1254
  	}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1255
1256
1257
  }
  
  /*
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1258
1259
1260
1261
   * Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
   * non-lru movable pages and hugepages). We scan pfn because it's much
   * easier than scanning over linked list. This function returns the pfn
   * of the first found movable page if it's found, otherwise 0.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1262
   */
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1263
  static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1264
1265
1266
1267
1268
1269
1270
1271
  {
  	unsigned long pfn;
  	struct page *page;
  	for (pfn = start; pfn < end; pfn++) {
  		if (pfn_valid(pfn)) {
  			page = pfn_to_page(pfn);
  			if (PageLRU(page))
  				return pfn;
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1272
1273
  			if (__PageMovable(page))
  				return pfn;
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1274
  			if (PageHuge(page)) {
7e1f049ef   Naoya Horiguchi   mm: hugetlb: clea...
1275
  				if (page_huge_active(page))
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1276
1277
1278
1279
1280
  					return pfn;
  				else
  					pfn = round_up(pfn + 1,
  						1 << compound_order(page)) - 1;
  			}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1281
1282
1283
1284
  		}
  	}
  	return 0;
  }
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1285
1286
1287
  static struct page *new_node_page(struct page *page, unsigned long private,
  		int **result)
  {
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1288
  	int nid = page_to_nid(page);
231e97e2b   Li Zhong   mem-hotplug: use ...
1289
  	nodemask_t nmask = node_states[N_MEMORY];
7f252f277   Michal Hocko   mm, memory_hotplu...
1290
1291
1292
1293
1294
1295
1296
1297
1298
  
  	/*
  	 * try to allocate from a different node but reuse this node if there
  	 * are no other online nodes to be used (e.g. we are offlining a part
  	 * of the only existing node)
  	 */
  	node_clear(nid, nmask);
  	if (nodes_empty(nmask))
  		node_set(nid, nmask);
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1299

8b9132388   Michal Hocko   mm: unify new_nod...
1300
  	return new_page_nodemask(page, nid, &nmask);
394e31d2c   Xishi Qiu   mem-hotplug: allo...
1301
  }
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
  #define NR_OFFLINE_AT_ONCE_PAGES	(256)
  static int
  do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
  {
  	unsigned long pfn;
  	struct page *page;
  	int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
  	int not_managed = 0;
  	int ret = 0;
  	LIST_HEAD(source);
  
  	for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
  		if (!pfn_valid(pfn))
  			continue;
  		page = pfn_to_page(pfn);
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
  
  		if (PageHuge(page)) {
  			struct page *head = compound_head(page);
  			pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
  			if (compound_order(head) > PFN_SECTION_SHIFT) {
  				ret = -EBUSY;
  				break;
  			}
  			if (isolate_huge_page(page, &source))
  				move_pages -= 1 << compound_order(head);
  			continue;
8135d8926   Naoya Horiguchi   mm: memory_hotplu...
1328
1329
1330
  		} else if (thp_migration_supported() && PageTransHuge(page))
  			pfn = page_to_pfn(compound_head(page))
  				+ hpage_nr_pages(page) - 1;
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1331

700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1332
  		if (!get_page_unless_zero(page))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1333
1334
  			continue;
  		/*
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1335
1336
  		 * We can skip free pages. And we can deal with pages on
  		 * LRU and non-lru movable pages.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1337
  		 */
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1338
1339
1340
1341
  		if (PageLRU(page))
  			ret = isolate_lru_page(page);
  		else
  			ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1342
  		if (!ret) { /* Success */
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1343
  			put_page(page);
62695a84e   Nick Piggin   vmscan: move isol...
1344
  			list_add_tail(&page->lru, &source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1345
  			move_pages--;
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1346
1347
1348
  			if (!__PageMovable(page))
  				inc_node_page_state(page, NR_ISOLATED_ANON +
  						    page_is_file_cache(page));
6d9c285a6   KOSAKI Motohiro   mm: move inc_zone...
1349

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1350
  		} else {
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1351
  #ifdef CONFIG_DEBUG_VM
0efadf48b   Yisheng Xie   mm/hotplug: enabl...
1352
1353
1354
  			pr_alert("failed to isolate pfn %lx
  ", pfn);
  			dump_page(page, "isolation failed");
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1355
  #endif
700c2a46e   Konstantin Khlebnikov   mem-hotplug: call...
1356
  			put_page(page);
25985edce   Lucas De Marchi   Fix common misspe...
1357
  			/* Because we don't have big zone->lock. we should
809c44497   Bob Liu   mm: do_migrate_ra...
1358
1359
1360
  			   check this again here. */
  			if (page_count(page)) {
  				not_managed++;
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1361
  				ret = -EBUSY;
809c44497   Bob Liu   mm: do_migrate_ra...
1362
1363
  				break;
  			}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1364
1365
  		}
  	}
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1366
1367
  	if (!list_empty(&source)) {
  		if (not_managed) {
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1368
  			putback_movable_pages(&source);
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1369
1370
  			goto out;
  		}
74c08f982   Minchan Kim   memory-hotplug: d...
1371

394e31d2c   Xishi Qiu   mem-hotplug: allo...
1372
1373
  		/* Allocate a new page from the nearest neighbor node */
  		ret = migrate_pages(&source, new_node_page, NULL, 0,
9c620e2bc   Hugh Dickins   mm: remove offlin...
1374
  					MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
f3ab2636c   Bob Liu   mm: do_migrate_ra...
1375
  		if (ret)
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1376
  			putback_movable_pages(&source);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1377
  	}
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
  out:
  	return ret;
  }
  
  /*
   * remove from free_area[] and mark all as Reserved.
   */
  static int
  offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
  			void *data)
  {
  	__offline_isolated_pages(start, start + nr_pages);
  	return 0;
  }
  
  static void
  offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
1396
  	walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
  				offline_isolated_pages_cb);
  }
  
  /*
   * Check all pages in range, recoreded as memory resource, are isolated.
   */
  static int
  check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
  			void *data)
  {
  	int ret;
  	long offlined = *(long *)data;
b023f4681   Wen Congyang   memory-hotplug: s...
1409
  	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages, true);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
  	offlined = nr_pages;
  	if (!ret)
  		*(long *)data += offlined;
  	return ret;
  }
  
  static long
  check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
  {
  	long offlined = 0;
  	int ret;
908eedc61   KAMEZAWA Hiroyuki   walk system ram r...
1421
  	ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1422
1423
1424
1425
1426
  			check_pages_isolated_cb);
  	if (ret < 0)
  		offlined = (long)ret;
  	return offlined;
  }
c5320926e   Tang Chen   mem-hotplug: intr...
1427
1428
  static int __init cmdline_parse_movable_node(char *p)
  {
4932381ee   Michal Hocko   mm, memory_hotplu...
1429
  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
55ac590c2   Tang Chen   memblock, mem_hot...
1430
  	movable_node_enabled = true;
4932381ee   Michal Hocko   mm, memory_hotplu...
1431
1432
1433
1434
  #else
  	pr_warn("movable_node parameter depends on CONFIG_HAVE_MEMBLOCK_NODE_MAP to work properly
  ");
  #endif
c5320926e   Tang Chen   mem-hotplug: intr...
1435
1436
1437
  	return 0;
  }
  early_param("movable_node", cmdline_parse_movable_node);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1438
1439
1440
1441
1442
1443
1444
1445
1446
  /* check which state of node_states will be changed when offline memory */
  static void node_states_check_changes_offline(unsigned long nr_pages,
  		struct zone *zone, struct memory_notify *arg)
  {
  	struct pglist_data *pgdat = zone->zone_pgdat;
  	unsigned long present_pages = 0;
  	enum zone_type zt, zone_last = ZONE_NORMAL;
  
  	/*
6715ddf94   Lai Jiangshan   hotplug: update n...
1447
1448
1449
  	 * If we have HIGHMEM or movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_NORMAL,
  	 * set zone_last to ZONE_NORMAL.
d9713679d   Lai Jiangshan   memory_hotplug: f...
1450
  	 *
6715ddf94   Lai Jiangshan   hotplug: update n...
1451
1452
1453
  	 * If we don't have HIGHMEM nor movable node,
  	 * node_states[N_NORMAL_MEMORY] contains nodes which have zones of
  	 * 0...ZONE_MOVABLE, set zone_last to ZONE_MOVABLE.
d9713679d   Lai Jiangshan   memory_hotplug: f...
1454
  	 */
6715ddf94   Lai Jiangshan   hotplug: update n...
1455
  	if (N_MEMORY == N_NORMAL_MEMORY)
d9713679d   Lai Jiangshan   memory_hotplug: f...
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
  		zone_last = ZONE_MOVABLE;
  
  	/*
  	 * check whether node_states[N_NORMAL_MEMORY] will be changed.
  	 * If the memory to be offline is in a zone of 0...zone_last,
  	 * and it is the last present memory, 0...zone_last will
  	 * become empty after offline , thus we can determind we will
  	 * need to clear the node from node_states[N_NORMAL_MEMORY].
  	 */
  	for (zt = 0; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
  		arg->status_change_nid_normal = zone_to_nid(zone);
  	else
  		arg->status_change_nid_normal = -1;
6715ddf94   Lai Jiangshan   hotplug: update n...
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
  #ifdef CONFIG_HIGHMEM
  	/*
  	 * If we have movable node, node_states[N_HIGH_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_HIGHMEM,
  	 * set zone_last to ZONE_HIGHMEM.
  	 *
  	 * If we don't have movable node, node_states[N_NORMAL_MEMORY]
  	 * contains nodes which have zones of 0...ZONE_MOVABLE,
  	 * set zone_last to ZONE_MOVABLE.
  	 */
  	zone_last = ZONE_HIGHMEM;
  	if (N_MEMORY == N_HIGH_MEMORY)
  		zone_last = ZONE_MOVABLE;
  
  	for (; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (zone_idx(zone) <= zone_last && nr_pages >= present_pages)
  		arg->status_change_nid_high = zone_to_nid(zone);
  	else
  		arg->status_change_nid_high = -1;
  #else
  	arg->status_change_nid_high = arg->status_change_nid_normal;
  #endif
d9713679d   Lai Jiangshan   memory_hotplug: f...
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
  	/*
  	 * node_states[N_HIGH_MEMORY] contains nodes which have 0...ZONE_MOVABLE
  	 */
  	zone_last = ZONE_MOVABLE;
  
  	/*
  	 * check whether node_states[N_HIGH_MEMORY] will be changed
  	 * If we try to offline the last present @nr_pages from the node,
  	 * we can determind we will need to clear the node from
  	 * node_states[N_HIGH_MEMORY].
  	 */
  	for (; zt <= zone_last; zt++)
  		present_pages += pgdat->node_zones[zt].present_pages;
  	if (nr_pages >= present_pages)
  		arg->status_change_nid = zone_to_nid(zone);
  	else
  		arg->status_change_nid = -1;
  }
  
  static void node_states_clear_node(int node, struct memory_notify *arg)
  {
  	if (arg->status_change_nid_normal >= 0)
  		node_clear_state(node, N_NORMAL_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
1517
1518
  	if ((N_MEMORY != N_NORMAL_MEMORY) &&
  	    (arg->status_change_nid_high >= 0))
d9713679d   Lai Jiangshan   memory_hotplug: f...
1519
  		node_clear_state(node, N_HIGH_MEMORY);
6715ddf94   Lai Jiangshan   hotplug: update n...
1520
1521
1522
1523
  
  	if ((N_MEMORY != N_HIGH_MEMORY) &&
  	    (arg->status_change_nid >= 0))
  		node_clear_state(node, N_MEMORY);
d9713679d   Lai Jiangshan   memory_hotplug: f...
1524
  }
a16cee10c   Wen Congyang   memory-hotplug: p...
1525
  static int __ref __offline_pages(unsigned long start_pfn,
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1526
1527
1528
1529
  		  unsigned long end_pfn, unsigned long timeout)
  {
  	unsigned long pfn, nr_pages, expire;
  	long offlined_pages;
7b78d335a   Yasunori Goto   memory hotplug: r...
1530
  	int ret, drain, retry_max, node;
d702909f0   Cody P Schafer   memory_hotplug: u...
1531
  	unsigned long flags;
a96dfddbc   Toshi Kani   base/memory, hotp...
1532
  	unsigned long valid_start, valid_end;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1533
  	struct zone *zone;
7b78d335a   Yasunori Goto   memory hotplug: r...
1534
  	struct memory_notify arg;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1535

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1536
1537
1538
1539
1540
1541
1542
  	/* at least, alignment against pageblock is necessary */
  	if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
  		return -EINVAL;
  	/* This makes hotplug much easier...and readable.
  	   we assume this for now. .*/
a96dfddbc   Toshi Kani   base/memory, hotp...
1543
  	if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end))
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1544
  		return -EINVAL;
7b78d335a   Yasunori Goto   memory hotplug: r...
1545

a96dfddbc   Toshi Kani   base/memory, hotp...
1546
  	zone = page_zone(pfn_to_page(valid_start));
7b78d335a   Yasunori Goto   memory hotplug: r...
1547
1548
  	node = zone_to_nid(zone);
  	nr_pages = end_pfn - start_pfn;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1549
  	/* set above range as isolated */
b023f4681   Wen Congyang   memory-hotplug: s...
1550
1551
  	ret = start_isolate_page_range(start_pfn, end_pfn,
  				       MIGRATE_MOVABLE, true);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1552
  	if (ret)
30467e0b3   David Rientjes   mm, hotplug: fix ...
1553
  		return ret;
7b78d335a   Yasunori Goto   memory hotplug: r...
1554
1555
1556
  
  	arg.start_pfn = start_pfn;
  	arg.nr_pages = nr_pages;
d9713679d   Lai Jiangshan   memory_hotplug: f...
1557
  	node_states_check_changes_offline(nr_pages, zone, &arg);
7b78d335a   Yasunori Goto   memory hotplug: r...
1558
1559
1560
1561
1562
  
  	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
  	ret = notifier_to_errno(ret);
  	if (ret)
  		goto failed_removal;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
  	pfn = start_pfn;
  	expire = jiffies + timeout;
  	drain = 0;
  	retry_max = 5;
  repeat:
  	/* start memory hot removal */
  	ret = -EAGAIN;
  	if (time_after(jiffies, expire))
  		goto failed_removal;
  	ret = -EINTR;
  	if (signal_pending(current))
  		goto failed_removal;
  	ret = 0;
  	if (drain) {
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
1577
  		lru_add_drain_all_cpuslocked();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1578
  		cond_resched();
c05543293   Vlastimil Babka   mm, memory_hotplu...
1579
  		drain_all_pages(zone);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1580
  	}
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1581
1582
  	pfn = scan_movable_pages(start_pfn, end_pfn);
  	if (pfn) { /* We have movable pages */
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
  		ret = do_migrate_range(pfn, end_pfn);
  		if (!ret) {
  			drain = 1;
  			goto repeat;
  		} else {
  			if (ret < 0)
  				if (--retry_max == 0)
  					goto failed_removal;
  			yield();
  			drain = 1;
  			goto repeat;
  		}
  	}
b3834be5c   Adam Buchbinder   various: Fix spel...
1596
  	/* drain all zone's lru pagevec, this is asynchronous... */
3f906ba23   Thomas Gleixner   mm/memory-hotplug...
1597
  	lru_add_drain_all_cpuslocked();
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1598
  	yield();
b3834be5c   Adam Buchbinder   various: Fix spel...
1599
  	/* drain pcp pages, this is synchronous. */
c05543293   Vlastimil Babka   mm, memory_hotplu...
1600
  	drain_all_pages(zone);
c8721bbbd   Naoya Horiguchi   mm: memory-hotplu...
1601
1602
1603
1604
  	/*
  	 * dissolve free hugepages in the memory block before doing offlining
  	 * actually in order to make hugetlbfs's object counting consistent.
  	 */
082d5b6b6   Gerald Schaefer   mm/hugetlb: check...
1605
1606
1607
  	ret = dissolve_free_huge_pages(start_pfn, end_pfn);
  	if (ret)
  		goto failed_removal;
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1608
1609
1610
1611
1612
1613
  	/* check again */
  	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
  	if (offlined_pages < 0) {
  		ret = -EBUSY;
  		goto failed_removal;
  	}
e33e33b4d   Chen Yucong   mm, memory hotplu...
1614
1615
  	pr_info("Offlined Pages %ld
  ", offlined_pages);
b3834be5c   Adam Buchbinder   various: Fix spel...
1616
  	/* Ok, all of our target is isolated.
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1617
1618
  	   We cannot do rollback at this point. */
  	offline_isolated_pages(start_pfn, end_pfn);
dbc0e4cef   KAMEZAWA Hiroyuki   memory hotremove:...
1619
  	/* reset pagetype flags and makes migrate type to be MOVABLE */
0815f3d81   Michal Nazarewicz   mm: page_isolatio...
1620
  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1621
  	/* removal success */
3dcc0571c   Jiang Liu   mm: correctly upd...
1622
  	adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1623
  	zone->present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1624
1625
  
  	pgdat_resize_lock(zone->zone_pgdat, &flags);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1626
  	zone->zone_pgdat->node_present_pages -= offlined_pages;
d702909f0   Cody P Schafer   memory_hotplug: u...
1627
  	pgdat_resize_unlock(zone->zone_pgdat, &flags);
7b78d335a   Yasunori Goto   memory hotplug: r...
1628

1b79acc91   KOSAKI Motohiro   mm, mem-hotplug: ...
1629
  	init_per_zone_wmark_min();
1e8537baa   Xishi Qiu   memory-hotplug: b...
1630
  	if (!populated_zone(zone)) {
340175b7d   Jiang Liu   mm/hotplug: free ...
1631
  		zone_pcp_reset(zone);
72675e131   Michal Hocko   mm, memory_hotplu...
1632
  		build_all_zonelists(NULL);
1e8537baa   Xishi Qiu   memory-hotplug: b...
1633
1634
  	} else
  		zone_pcp_update(zone);
340175b7d   Jiang Liu   mm/hotplug: free ...
1635

d9713679d   Lai Jiangshan   memory_hotplug: f...
1636
  	node_states_clear_node(node, &arg);
698b1b306   Vlastimil Babka   mm, compaction: i...
1637
  	if (arg.status_change_nid >= 0) {
8fe23e057   David Rientjes   mm: clear node in...
1638
  		kswapd_stop(node);
698b1b306   Vlastimil Babka   mm, compaction: i...
1639
1640
  		kcompactd_stop(node);
  	}
bce7394a3   Minchan Kim   page-allocator: r...
1641

0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1642
1643
  	vm_total_pages = nr_free_pagecache_pages();
  	writeback_set_ratelimit();
7b78d335a   Yasunori Goto   memory hotplug: r...
1644
1645
  
  	memory_notify(MEM_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1646
1647
1648
  	return 0;
  
  failed_removal:
e33e33b4d   Chen Yucong   mm, memory hotplu...
1649
1650
1651
1652
  	pr_debug("memory offlining [mem %#010llx-%#010llx] failed
  ",
  		 (unsigned long long) start_pfn << PAGE_SHIFT,
  		 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
7b78d335a   Yasunori Goto   memory hotplug: r...
1653
  	memory_notify(MEM_CANCEL_OFFLINE, &arg);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1654
  	/* pushback to free area */
0815f3d81   Michal Nazarewicz   mm: page_isolatio...
1655
  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
0c0e61958   KAMEZAWA Hiroyuki   memory unplug: pa...
1656
1657
  	return ret;
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1658

b93e0f329   Michal Hocko   mm, memory_hotplu...
1659
  /* Must be protected by mem_hotplug_begin() or a device_lock */
a16cee10c   Wen Congyang   memory-hotplug: p...
1660
1661
1662
1663
  int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
  {
  	return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ);
  }
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1664
  #endif /* CONFIG_MEMORY_HOTREMOVE */
a16cee10c   Wen Congyang   memory-hotplug: p...
1665

bbc76be67   Wen Congyang   memory-hotplug: r...
1666
1667
1668
  /**
   * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn)
   * @start_pfn: start pfn of the memory range
e05c4bbfa   Toshi Kani   mm: walk_memory_r...
1669
   * @end_pfn: end pfn of the memory range
bbc76be67   Wen Congyang   memory-hotplug: r...
1670
1671
1672
1673
1674
1675
1676
1677
   * @arg: argument passed to func
   * @func: callback for each memory section walked
   *
   * This function walks through all present mem sections in range
   * [start_pfn, end_pfn) and call func on each mem section.
   *
   * Returns the return value of func.
   */
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1678
  int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
bbc76be67   Wen Congyang   memory-hotplug: r...
1679
  		void *arg, int (*func)(struct memory_block *, void *))
71088785c   Badari Pulavarty   mm: cleanup to ma...
1680
  {
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1681
1682
  	struct memory_block *mem = NULL;
  	struct mem_section *section;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1683
1684
  	unsigned long pfn, section_nr;
  	int ret;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		section_nr = pfn_to_section_nr(pfn);
  		if (!present_section_nr(section_nr))
  			continue;
  
  		section = __nr_to_section(section_nr);
  		/* same memblock? */
  		if (mem)
  			if ((section_nr >= mem->start_section_nr) &&
  			    (section_nr <= mem->end_section_nr))
  				continue;
  
  		mem = find_memory_block_hinted(section, mem);
  		if (!mem)
  			continue;
bbc76be67   Wen Congyang   memory-hotplug: r...
1701
  		ret = func(mem, arg);
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1702
  		if (ret) {
bbc76be67   Wen Congyang   memory-hotplug: r...
1703
1704
  			kobject_put(&mem->dev.kobj);
  			return ret;
e90bdb7f5   Wen Congyang   memory-hotplug: u...
1705
1706
1707
1708
1709
  		}
  	}
  
  	if (mem)
  		kobject_put(&mem->dev.kobj);
bbc76be67   Wen Congyang   memory-hotplug: r...
1710
1711
  	return 0;
  }
e2ff39400   Rafael J. Wysocki   ACPI / memhotplug...
1712
  #ifdef CONFIG_MEMORY_HOTREMOVE
d6de9d534   Xishi Qiu   mm/memory_hotplug...
1713
  static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
bbc76be67   Wen Congyang   memory-hotplug: r...
1714
1715
  {
  	int ret = !is_memblock_offlined(mem);
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1716
1717
1718
1719
1720
  	if (unlikely(ret)) {
  		phys_addr_t beginpa, endpa;
  
  		beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
  		endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
756a025f0   Joe Perches   mm: coalesce spli...
1721
1722
  		pr_warn("removing memory fails, because memory [%pa-%pa] is onlined
  ",
349daa0f9   Randy Dunlap   mm: fix memory_ho...
1723
1724
  			&beginpa, &endpa);
  	}
bbc76be67   Wen Congyang   memory-hotplug: r...
1725
1726
1727
  
  	return ret;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1728
  static int check_cpu_on_node(pg_data_t *pgdat)
60a5a19e7   Tang Chen   memory-hotplug: r...
1729
  {
60a5a19e7   Tang Chen   memory-hotplug: r...
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
  	int cpu;
  
  	for_each_present_cpu(cpu) {
  		if (cpu_to_node(cpu) == pgdat->node_id)
  			/*
  			 * the cpu on this node isn't removed, and we can't
  			 * offline this node.
  			 */
  			return -EBUSY;
  	}
  
  	return 0;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1743
  static void unmap_cpu_on_node(pg_data_t *pgdat)
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1744
1745
  {
  #ifdef CONFIG_ACPI_NUMA
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1746
1747
1748
1749
1750
1751
1752
  	int cpu;
  
  	for_each_possible_cpu(cpu)
  		if (cpu_to_node(cpu) == pgdat->node_id)
  			numa_clear_node(cpu);
  #endif
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1753
  static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1754
  {
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1755
  	int ret;
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1756

0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1757
  	ret = check_cpu_on_node(pgdat);
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1758
1759
1760
1761
1762
1763
1764
  	if (ret)
  		return ret;
  
  	/*
  	 * the node will be offlined when we come here, so we can clear
  	 * the cpu_to_node() now.
  	 */
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1765
  	unmap_cpu_on_node(pgdat);
e13fe8695   Wen Congyang   cpu-hotplug,memor...
1766
1767
  	return 0;
  }
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1768
1769
1770
1771
1772
1773
1774
1775
  /**
   * try_offline_node
   *
   * Offline a node if all memory sections and cpus of the node are removed.
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call.
   */
90b30cdc1   Wen Congyang   memory-hotplug: e...
1776
  void try_offline_node(int nid)
60a5a19e7   Tang Chen   memory-hotplug: r...
1777
  {
d822b86a9   Wen Congyang   memory-hotplug: f...
1778
1779
1780
  	pg_data_t *pgdat = NODE_DATA(nid);
  	unsigned long start_pfn = pgdat->node_start_pfn;
  	unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
60a5a19e7   Tang Chen   memory-hotplug: r...
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
  	unsigned long pfn;
  
  	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  		unsigned long section_nr = pfn_to_section_nr(pfn);
  
  		if (!present_section_nr(section_nr))
  			continue;
  
  		if (pfn_to_nid(pfn) != nid)
  			continue;
  
  		/*
  		 * some memory sections of this node are not removed, and we
  		 * can't offline node now.
  		 */
  		return;
  	}
0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1798
  	if (check_and_unmap_cpu_on_node(pgdat))
60a5a19e7   Tang Chen   memory-hotplug: r...
1799
1800
1801
1802
1803
1804
1805
1806
1807
  		return;
  
  	/*
  	 * all memory/cpu of this node are removed, we can offline this
  	 * node now.
  	 */
  	node_set_offline(nid);
  	unregister_one_node(nid);
  }
90b30cdc1   Wen Congyang   memory-hotplug: e...
1808
  EXPORT_SYMBOL(try_offline_node);
60a5a19e7   Tang Chen   memory-hotplug: r...
1809

0f1cfe9d0   Toshi Kani   mm/hotplug: remov...
1810
1811
1812
1813
1814
1815
1816
  /**
   * remove_memory
   *
   * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
   * and online/offline operations before this call, as required by
   * try_offline_node().
   */
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1817
  void __ref remove_memory(int nid, u64 start, u64 size)
bbc76be67   Wen Congyang   memory-hotplug: r...
1818
  {
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1819
  	int ret;
993c1aad8   Wen Congyang   memory-hotplug: t...
1820

27356f54c   Toshi Kani   mm/hotplug: verif...
1821
  	BUG_ON(check_hotplug_memory_range(start, size));
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1822
  	mem_hotplug_begin();
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1823
1824
  
  	/*
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1825
1826
1827
  	 * All memory blocks must be offlined before removing memory.  Check
  	 * whether all memory blocks in question are offline and trigger a BUG()
  	 * if this is not the case.
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1828
  	 */
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1829
  	ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
d6de9d534   Xishi Qiu   mm/memory_hotplug...
1830
  				check_memblock_offlined_cb);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1831
  	if (ret)
242831eb1   Rafael J. Wysocki   Memory hotplug / ...
1832
  		BUG();
6677e3eaf   Yasuaki Ishimatsu   memory-hotplug: c...
1833

46c66c4b7   Yasuaki Ishimatsu   memory-hotplug: r...
1834
1835
  	/* remove memmap entry */
  	firmware_map_remove(start, start + size, "System RAM");
f9126ab92   Xishi Qiu   memory-hotplug: f...
1836
1837
  	memblock_free(start, size);
  	memblock_remove(start, size);
46c66c4b7   Yasuaki Ishimatsu   memory-hotplug: r...
1838

24d335ca3   Wen Congyang   memory-hotplug: i...
1839
  	arch_remove_memory(start, size);
60a5a19e7   Tang Chen   memory-hotplug: r...
1840
  	try_offline_node(nid);
bfc8c9013   Vladimir Davydov   mem-hotplug: impl...
1841
  	mem_hotplug_done();
71088785c   Badari Pulavarty   mm: cleanup to ma...
1842
  }
71088785c   Badari Pulavarty   mm: cleanup to ma...
1843
  EXPORT_SYMBOL_GPL(remove_memory);
aba6efc47   Rafael J. Wysocki   Memory hotplug: M...
1844
  #endif /* CONFIG_MEMORY_HOTREMOVE */